What is the fastest Python PDF library?

PDF Oxide is the fastest Python PDF library, with 0.8ms mean text extraction time — 5.8× faster than PyMuPDF (4.6ms) and 15× faster than pypdf (12.1ms). Benchmarked on 3,830 real-world PDFs with 100% pass rate.

Is PDF Oxide free for commercial use?

Yes. PDF Oxide is MIT licensed — free for all uses including commercial products, SaaS, and proprietary software. No license fees, no sales calls, no AGPL restrictions.

Can PDF Oxide handle scanned PDFs with OCR?

Yes. PDF Oxide includes built-in OCR via PaddleOCR and ONNX Runtime. No Tesseract installation needed — just pip install pdf_oxide and use extract_text_ocr(). Supports PP-OCRv3, v4, and v5 models.

Does PDF Oxide support XFA forms?

Yes. PDF Oxide is the only Python PDF library that can detect, analyze, and extract data from XFA forms (XML Forms Architecture). PyMuPDF, pypdf, pdfplumber, and pdfminer cannot read XFA form data.

How does PDF Oxide compare to PyMuPDF?

PDF Oxide is 5.8× faster than PyMuPDF (0.8ms vs 4.6ms mean), has a 100% pass rate vs 99.3%, and is MIT licensed vs PyMuPDF's AGPL-3.0. PDF Oxide also has built-in Markdown/HTML output and XFA form support that PyMuPDF lacks.

Can PDF Oxide convert PDF to Markdown?

Yes. PDF Oxide has built-in PDF to Markdown conversion with heading detection, table preservation, and list formatting — ideal for LLM and RAG pipelines. No separate package needed, unlike PyMuPDF which requires pymupdf4llm (69× slower).

Извлечение данных форм

PDF Oxide извлекает интерактивные поля форм (AcroForms) из PDF-документов: текстовые поля, флажки, переключатели, поля выбора и подписи. Извлечённые данные можно экспортировать в форматы FDF или XFDF для обмена с другими приложениями. Также поддерживается анализ и конвертация XFA-форм (XML Forms Architecture).

Быстрый пример

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
for field in fields:
    print(f"{field.name} ({field.field_type}): {field.value}")

Node.js

const { PdfDocument } = require("pdf-oxide");

const doc = new PdfDocument("form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
  console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
doc.close();

import pdfoxide "github.com/yfedoseev/pdf_oxide/go"

doc, _ := pdfoxide.Open("form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
    fmt.Printf("%s (%s): %s\n", field.Name, field.FieldType, field.Value)
}

using PdfOxide.Core;

using var doc = PdfDocument.Open("form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
    Console.WriteLine($"{field.Name} ({field.FieldType}): {field.Value}");
}

WASM

const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
    console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
    println!("{} ({:?}): {:?}", field.full_name, field.field_type, field.value);
}

Java

import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;

try (PdfDocument doc = PdfDocument.open("form.pdf")) {
    for (FormField field : doc.formFields()) {
        System.out.printf("%s (%s): %s%n",
            field.name(), field.type(), field.value().orElse(""));
    }
}

Ruby

require 'pdf_oxide'

PdfOxide::PdfDocument.open('form.pdf') do |doc|
  doc.form_fields.each do |field|
    puts "#{field[:name]} (#{field[:type]}): #{field[:value]}"
  end
end

C++

#include <pdf_oxide/pdf_oxide.hpp>

auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
    std::cout << field.name << " (" << field.type << "): " << field.value << "\n";
}

Swift

import PdfOxide

let doc = try Document.open("form.pdf")
for field in try doc.formFields() {
    print("\(field.name) (\(field.type)): \(field.value)")
}

Kotlin

import fyi.oxide.pdf.PdfDocument

PdfDocument.open("form.pdf").use { doc ->
    doc.formFields().forEach { field ->
        println("${field.name()} (${field.type()}): ${field.valueOrNull() ?: ""}")
    }
}

Dart

import 'package:pdf_oxide/pdf_oxide.dart';

final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
  print('${field.name} (${field.type}): ${field.value}');
}

library(pdfoxide)

doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
  cat(sprintf("%s (%s): %s\n", field$name, field$type, field$value))
}

Julia

using PdfOxide

doc = open_document("form.pdf")
for field in get_form_fields(doc)
    println("$(field.name) ($(field.type)): $(field.value)")
end

Zig

const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;

var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const name = try fields.getName(a, i);
    const ftype = try fields.getType(a, i);
    const value = try fields.getValue(a, i);
    std.debug.print("{s} ({s}): {s}\n", .{ name, ftype, value });
}

Scala

import fyi.oxide.pdf.{PdfDocument, valueOption}
import scala.util.Using

Using.resource(PdfDocument.open("form.pdf")) { doc =>
  doc.formFieldsSeq.foreach { field =>
    println(s"${field.name} (${field.`type`}): ${field.valueOption.getOrElse("")}")
  }
}

Clojure

(require '[pdf-oxide.core :as pdf])

(with-open [doc (pdf/open "form.pdf")]
  (doseq [field (pdf/form-fields doc)]
    (println (format "%s (%s): %s"
                     (.name field) (.type field) (.orElse (.value field) "")))))

Objective-C

#import "POXPdfOxide.h"
NSError *err = nil;

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    NSLog(@"%@ (%@): %@", field.name, field.type, field.value);
}

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
  IO.puts("#{field.name} (#{field.type}): #{field.value}")
end)

Миграция с PyMuPDF get_form_fields()

Если вы переходите с PyMuPDF, API схожи по структуре, но PDF Oxide возвращает более богатые данные и поддерживает XFA-формы:

PyMuPDF:

import fitz

doc = fitz.open("form.pdf")
# Returns dict of {field_name: field_value} — loses type info
fields = doc.get_form_fields()

# Or iterate widgets for more detail
for page in doc:
    for widget in page.widgets():
        print(widget.field_name, widget.field_value)

PDF Oxide:

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
# Returns structured objects with name, value, type, options, rect
fields = doc.get_form_fields()
for field in fields:
    print(f"{field.name} ({field.field_type}): {field.value}")

# Also handles XFA forms that PyMuPDF cannot read
xfa = doc.has_xfa()

Ключевые отличия:

PDF Oxide возвращает структурированные объекты полей (а не просто словарь)
Содержит тип поля, ограничивающий прямоугольник и варианты для полей выбора
Поддерживает XFA-формы — get_form_fields() из PyMuPDF возвращает пустой результат для PDF только с XFA
Экспорт в FDF/XFDF для обмена данными форм между системами

Полное руководство по миграции с PyMuPDF, pypdf, pdfplumber и pdfminer см. в разделе Миграция на PDF Oxide.

Чтение полей формы

Получение всех полей

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("tax-form.pdf")
fields = doc.get_form_fields()

for field in fields:
    print(f"Name: {field.name}")
    print(f"  Type: {field.field_type}")
    print(f"  Value: {field.value}")
    print(f"  Required: {field.is_required}")
    print(f"  Read-only: {field.is_readonly}")
    if field.max_length:
        print(f"  Max length: {field.max_length}")

Node.js

const doc = new PdfDocument("tax-form.pdf");
const fields = doc.getFormFields();

for (const field of fields) {
  console.log(`Name: ${field.name}`);
  console.log(`  Type: ${field.fieldType}`);
  console.log(`  Value: ${field.value}`);
}
doc.close();

doc, _ := pdfoxide.Open("tax-form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()

for _, field := range fields {
    fmt.Printf("Name: %s\n", field.Name)
    fmt.Printf("  Type: %s\n", field.FieldType)
    fmt.Printf("  Value: %s\n", field.Value)
}

using var doc = PdfDocument.Open("tax-form.pdf");
var fields = doc.GetFormFields();

foreach (var field in fields)
{
    Console.WriteLine($"Name: {field.Name}");
    Console.WriteLine($"  Type: {field.FieldType}");
    Console.WriteLine($"  Value: {field.Value}");
}

WASM

const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();

for (const field of fields) {
    console.log(`Name: ${field.name}`);
    console.log(`  Type: ${field.fieldType}`);
    console.log(`  Value: ${field.value}`);
    console.log(`  Flags: ${field.flags}`);
}

Rust

use pdf_oxide::extractors::{FormExtractor, FieldType};
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("tax-form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;

for field in &fields {
    let type_str = match &field.field_type {
        FieldType::Button => "Button",
        FieldType::Text => "Text",
        FieldType::Choice => "Choice",
        FieldType::Signature => "Signature",
        FieldType::Unknown(s) => s.as_str(),
    };

    println!("[{}] {} = {:?}", type_str, field.full_name, field.value);

    if let Some(tooltip) = &field.tooltip {
        println!("  Tooltip: {}", tooltip);
    }
    if let Some(bounds) = &field.bounds {
        println!("  Bounds: [{:.1}, {:.1}, {:.1}, {:.1}]",
            bounds[0], bounds[1], bounds[2], bounds[3]);
    }
}

Java

import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;

try (PdfDocument doc = PdfDocument.open("tax-form.pdf")) {
    for (FormField field : doc.formFields()) {
        System.out.println("Name: " + field.name());
        System.out.println("  Type: " + field.type());
        System.out.println("  Value: " + field.value().orElse(""));
    }
}

Ruby

PdfOxide::PdfDocument.open('tax-form.pdf') do |doc|
  doc.form_fields.each do |field|
    puts "Name: #{field[:name]}"
    puts "  Type: #{field[:type]}"
    puts "  Value: #{field[:value]}"
  end
end

C++

auto doc = pdf_oxide::Document::open("tax-form.pdf");
for (const auto& field : doc.get_form_fields()) {
    std::cout << "Name: " << field.name << "\n";
    std::cout << "  Type: " << field.type << "\n";
    std::cout << "  Value: " << field.value << "\n";
    std::cout << "  Read-only: " << field.readonly << "\n";
    std::cout << "  Required: " << field.required << "\n";
}

Swift

let doc = try Document.open("tax-form.pdf")
for field in try doc.formFields() {
    print("Name: \(field.name)")
    print("  Type: \(field.type)")
    print("  Value: \(field.value)")
    print("  Read-only: \(field.readonly)")
    print("  Required: \(field.required)")
}

Kotlin

PdfDocument.open("tax-form.pdf").use { doc ->
    doc.formFields().forEach { field ->
        println("Name: ${field.name()}")
        println("  Type: ${field.type()}")
        println("  Value: ${field.valueOrNull() ?: ""}")
    }
}

Dart

final doc = PdfDocument.open('tax-form.pdf');
for (final field in doc.getFormFields()) {
  print('Name: ${field.name}');
  print('  Type: ${field.type}');
  print('  Value: ${field.value}');
  print('  Read-only: ${field.readonly}');
  print('  Required: ${field.required}');
}

doc <- pdf_open("tax-form.pdf")
for (field in pdf_get_form_fields(doc)) {
  cat(sprintf("Name: %s\n", field$name))
  cat(sprintf("  Type: %s\n", field$type))
  cat(sprintf("  Value: %s\n", field$value))
  cat(sprintf("  Read-only: %s\n", field$readonly))
  cat(sprintf("  Required: %s\n", field$required))
}

Julia

doc = open_document("tax-form.pdf")
for field in get_form_fields(doc)
    println("Name: $(field.name)")
    println("  Type: $(field.type)")
    println("  Value: $(field.value)")
    println("  Read-only: $(field.readonly)")
    println("  Required: $(field.required)")
end

Zig

var doc = try pdf_oxide.Document.open("tax-form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const name = try fields.getName(a, i);
    const ftype = try fields.getType(a, i);
    const value = try fields.getValue(a, i);
    std.debug.print("Name: {s}\n  Type: {s}\n  Value: {s}\n", .{ name, ftype, value });
    std.debug.print("  Read-only: {}\n  Required: {}\n", .{
        try fields.isReadonly(i), try fields.isRequired(i),
    });
}

Scala

Using.resource(PdfDocument.open("tax-form.pdf")) { doc =>
  doc.formFieldsSeq.foreach { field =>
    println(s"Name: ${field.name}")
    println(s"  Type: ${field.`type`}")
    println(s"  Value: ${field.valueOption.getOrElse("")}")
  }
}

Clojure

(with-open [doc (pdf/open "tax-form.pdf")]
  (doseq [field (pdf/form-fields doc)]
    (println "Name:" (.name field))
    (println "  Type:" (.type field))
    (println "  Value:" (.orElse (.value field) ""))))

Objective-C

POXDocument *doc = [POXDocument openPath:@"tax-form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    NSLog(@"Name: %@", field.name);
    NSLog(@"  Type: %@", field.type);
    NSLog(@"  Value: %@", field.value);
    NSLog(@"  Read-only: %d", field.readonly);
    NSLog(@"  Required: %d", field.required);
}

Elixir

{:ok, doc} = PdfOxide.open("tax-form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
  IO.puts("Name: #{field.name}")
  IO.puts("  Type: #{field.type}")
  IO.puts("  Value: #{field.value}")
  IO.puts("  Read-only: #{field.read_only}")
  IO.puts("  Required: #{field.required}")
end)

Получение значения конкретного поля

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

name = doc.get_form_field_value("employee_name")
ssn = doc.get_form_field_value("ssn")
agreed = doc.get_form_field_value("agree_to_terms")

print(f"Name: {name}")       # "John Doe"
print(f"SSN: {ssn}")         # "123-45-6789"
print(f"Agreed: {agreed}")   # True

WASM

const doc = new WasmPdfDocument(bytes);

const name = doc.getFormFieldValue("employee_name");
const ssn = doc.getFormFieldValue("ssn");
const agreed = doc.getFormFieldValue("agree_to_terms");

console.log(`Name: ${name}`);     // "John Doe"
console.log(`SSN: ${ssn}`);       // "123-45-6789"
console.log(`Agreed: ${agreed}`); // true

Rust

use pdf_oxide::editor::{DocumentEditor, EditableDocument};

let mut editor = DocumentEditor::open("form.pdf")?;

if let Some(value) = editor.get_form_field_value("employee_name")? {
    println!("Name: {:?}", value);
}

Заполнение форм

Установка значений полей

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Set text fields
doc.set_form_field_value("full_name", "Jane Doe")
doc.set_form_field_value("email", "jane@example.com")

# Set checkboxes
doc.set_form_field_value("agree_to_terms", True)

# Save the filled form
doc.save("filled_form.pdf")

WASM

const doc = new WasmPdfDocument(bytes);

// Set text fields
doc.setFormFieldValue("full_name", "Jane Doe");
doc.setFormFieldValue("email", "jane@example.com");

// Set checkboxes
doc.setFormFieldValue("agree_to_terms", true);

// Save the filled form
const filledBytes = doc.save();

Rust

use pdf_oxide::editor::{DocumentEditor, EditableDocument, FormFieldValue};

let mut editor = DocumentEditor::open("form.pdf")?;

// Set text fields
editor.set_form_field_value("full_name", FormFieldValue::Text("Jane Doe".into()))?;
editor.set_form_field_value("email", FormFieldValue::Text("jane@example.com".into()))?;

// Set checkboxes
editor.set_form_field_value("agree_to_terms", FormFieldValue::Boolean(true))?;

// Set choice fields
editor.set_form_field_value("state", FormFieldValue::Choice("California".into()))?;

editor.save("filled_form.pdf")?;

Java

import fyi.oxide.pdf.DocumentEditor;

try (DocumentEditor editor = DocumentEditor.open("form.pdf")) {
    // Set text fields
    editor.setFormField("full_name", "Jane Doe");
    editor.setFormField("email", "jane@example.com");

    // Set checkboxes
    editor.setFormField("agree_to_terms", true);

    editor.saveTo(java.nio.file.Path.of("filled_form.pdf"));
}

Ruby

PdfOxide::DocumentEditor.open('form.pdf') do |editor|
  # Set text fields
  editor.set_form_field('full_name', 'Jane Doe')
  editor.set_form_field('email', 'jane@example.com')

  # Set checkboxes
  editor.set_form_field('agree_to_terms', true)

  editor.save_to('filled_form.pdf')
end

C++

auto editor = pdf_oxide::DocumentEditor::open("form.pdf");

// Set text fields
editor.set_form_field_value("full_name", "Jane Doe");
editor.set_form_field_value("email", "jane@example.com");

// Set checkboxes
editor.set_form_field_value("agree_to_terms", "true");

editor.save("filled_form.pdf");

Swift

let editor = try DocumentEditor.openEditor("form.pdf")

// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe")
try editor.setFormFieldValue("email", "jane@example.com")

// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true")

try editor.save("filled_form.pdf")

Kotlin

import fyi.oxide.pdf.DocumentEditor

DocumentEditor.open("form.pdf").use { editor ->
    // Set text fields
    editor.setFormField("full_name", "Jane Doe")
    editor.setFormField("email", "jane@example.com")

    // Set checkboxes
    editor.setFormField("agree_to_terms", true)

    editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}

Dart

final editor = DocumentEditor.open('form.pdf');

// Set text fields
editor.setFormFieldValue('full_name', 'Jane Doe');
editor.setFormFieldValue('email', 'jane@example.com');

// Set checkboxes
editor.setFormFieldValue('agree_to_terms', 'true');

editor.save('filled_form.pdf');

editor <- pdf_editor_open("form.pdf")

# Set text fields
pdf_editor_set_form_field_value(editor, "full_name", "Jane Doe")
pdf_editor_set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
pdf_editor_set_form_field_value(editor, "agree_to_terms", "true")

pdf_editor_save(editor, "filled_form.pdf")

Julia

editor = open_editor("form.pdf")

# Set text fields
set_form_field_value(editor, "full_name", "Jane Doe")
set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
set_form_field_value(editor, "agree_to_terms", "true")

save(editor, "filled_form.pdf")

Zig

var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();

// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe");
try editor.setFormFieldValue("email", "jane@example.com");

// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true");

try editor.save("filled_form.pdf");

Scala

import fyi.oxide.pdf.DocumentEditor
import scala.util.Using

Using.resource(DocumentEditor.open("form.pdf")) { editor =>
  // Set text fields
  editor.setFormField("full_name", "Jane Doe")
  editor.setFormField("email", "jane@example.com")

  // Set checkboxes
  editor.setFormField("agree_to_terms", true)

  editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}

Clojure

(with-open [editor (pdf/editor "form.pdf")]
  ;; Set text fields
  (.setFormField editor "full_name" "Jane Doe")
  (.setFormField editor "email" "jane@example.com")

  ;; Set checkboxes
  (.setFormField editor "agree_to_terms" true)

  (.saveTo editor (java.nio.file.Path/of "filled_form.pdf" (make-array String 0))))

Objective-C

POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];

// Set text fields
[editor setFormField:@"full_name" value:@"Jane Doe" error:&err];
[editor setFormField:@"email" value:@"jane@example.com" error:&err];

// Set checkboxes
[editor setFormField:@"agree_to_terms" value:@"true" error:&err];

[editor saveToPath:@"filled_form.pdf" error:&err];

Elixir

{:ok, editor} = PdfOxide.open_editor("form.pdf")

# Set text fields
PdfOxide.set_form_field_value(editor, "full_name", "Jane Doe")
PdfOxide.set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
PdfOxide.set_form_field_value(editor, "agree_to_terms", "true")

PdfOxide.editor_save(editor, "filled_form.pdf")

Экспорт данных форм

Экспортируйте данные полей формы в форматы FDF или XFDF для обмена с другими приложениями.

Экспорт в FDF

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.fdf")

WASM

const doc = new WasmPdfDocument(bytes);
const fdfBytes = doc.exportFormData("fdf");
// fdfBytes is a Uint8Array

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let fdf_bytes = FormExtractor::export_fdf(&mut doc, fields)?;
std::fs::write("form_data.fdf", &fdf_bytes)?;

C++

auto doc = pdf_oxide::Document::open("form.pdf");
auto fdf = doc.export_form_data_to_bytes(0); // 0 = FDF
std::ofstream("form_data.fdf", std::ios::binary)
    .write(reinterpret_cast<const char*>(fdf.data()), fdf.size());

Swift

let doc = try Document.open("form.pdf")
let fdf = try doc.exportFormData(formatType: 0) // 0 = FDF
try Data(fdf).write(to: URL(fileURLWithPath: "form_data.fdf"))

Dart

final doc = PdfDocument.open('form.pdf');
final fdf = doc.exportFormDataToBytes(0); // 0 = FDF
File('form_data.fdf').writeAsBytesSync(fdf);

doc <- pdf_open("form.pdf")
fdf <- pdf_export_form_data_to_bytes(doc, format_type = 0L) # 0 = FDF
writeBin(fdf, "form_data.fdf")

Julia

doc = open_document("form.pdf")
fdf = export_form_data_to_bytes(doc, 0) # 0 = FDF
write("form_data.fdf", fdf)

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
const fdf = try doc.exportFormDataToBytes(a, 0); // 0 = FDF
defer a.free(fdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.fdf", .data = fdf });

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *fdf = [doc exportFormDataToBytes:0 error:&err]; // 0 = FDF
[fdf writeToFile:@"form_data.fdf" atomically:YES];

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fdf} = PdfOxide.export_form_data_to_bytes(doc, 0) # 0 = FDF
File.write!("form_data.fdf", fdf)

Экспорт в XFDF

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.xfdf", format="xfdf")

WASM

const doc = new WasmPdfDocument(bytes);
const xfdfBytes = doc.exportFormData("xfdf");

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let xfdf = FormExtractor::export_xfdf(&mut doc, fields)?;
std::fs::write("form_data.xfdf", &xfdf)?;

C++

auto doc = pdf_oxide::Document::open("form.pdf");
auto xfdf = doc.export_form_data_to_bytes(1); // 1 = XFDF
std::ofstream("form_data.xfdf", std::ios::binary)
    .write(reinterpret_cast<const char*>(xfdf.data()), xfdf.size());

Swift

let doc = try Document.open("form.pdf")
let xfdf = try doc.exportFormData(formatType: 1) // 1 = XFDF
try Data(xfdf).write(to: URL(fileURLWithPath: "form_data.xfdf"))

Dart

final doc = PdfDocument.open('form.pdf');
final xfdf = doc.exportFormDataToBytes(1); // 1 = XFDF
File('form_data.xfdf').writeAsBytesSync(xfdf);

doc <- pdf_open("form.pdf")
xfdf <- pdf_export_form_data_to_bytes(doc, format_type = 1L) # 1 = XFDF
writeBin(xfdf, "form_data.xfdf")

Julia

doc = open_document("form.pdf")
xfdf = export_form_data_to_bytes(doc, 1) # 1 = XFDF
write("form_data.xfdf", xfdf)

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
const xfdf = try doc.exportFormDataToBytes(a, 1); // 1 = XFDF
defer a.free(xfdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.xfdf", .data = xfdf });

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *xfdf = [doc exportFormDataToBytes:1 error:&err]; // 1 = XFDF
[xfdf writeToFile:@"form_data.xfdf" atomically:YES];

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, xfdf} = PdfOxide.export_form_data_to_bytes(doc, 1) # 1 = XFDF
File.write!("form_data.xfdf", xfdf)

Об импорте данных

Экспорт — это сторона чтения в рамках цикла данных. Запись — загрузка данных FDF/XFDF обратно в PDF — описана в документации по редактированию. В C ABI и привязке Swift объявлены pdf_editor_import_fdf_bytes, pdf_editor_import_xfdf_bytes, pdf_form_import_from_file и pdf_document_import_form_data, однако в v0.3.69 они возвращают статус Unsupported. Рекомендуемый переносимый подход — вручную разобрать FDF/XFDF и вызвать set_form_field_value. Рабочие примеры импорта см. в разделе Редактирование полей форм → Импорт данных формы.

Поля форм в Markdown и HTML

По умолчанию значения полей формы включаются при конвертации в Markdown и HTML. Управляйте этим поведением с помощью параметра include_form_fields.

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Include form field values (default)
md = doc.to_markdown(0, include_form_fields=True)

# Exclude form fields
md = doc.to_markdown(0, include_form_fields=False)

WASM

const doc = new WasmPdfDocument(bytes);

// Include form fields (default: true)
const md = doc.toMarkdown(0, true, true, true);

// Exclude form fields (4th parameter)
const md2 = doc.toMarkdown(0, true, true, false);

Rust

use pdf_oxide::PdfDocument;
use pdf_oxide::converters::ConversionOptions;

let doc = PdfDocument::open("form.pdf")?;
let options = ConversionOptions {
    include_form_fields: true,
    ..Default::default()
};
let md = doc.to_markdown(0, &options)?;

Уплощение форм

Преобразует поля формы в статическое содержимое страницы, делая их нередактируемыми. Удобно для создания финальных версий PDF-документов.

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Flatten all form fields
doc.flatten_forms()
doc.save("flattened.pdf")

# Or flatten a single page
doc2 = PdfDocument("form.pdf")
doc2.flatten_forms_on_page(0)
doc2.save("flattened_page0.pdf")

WASM

const doc = new WasmPdfDocument(bytes);

// Flatten all form fields
doc.flattenForms();
const flattened = doc.save();

// Or flatten a single page
const doc2 = new WasmPdfDocument(bytes);
doc2.flattenFormsOnPage(0);
const flattened2 = doc2.save();

Rust

use pdf_oxide::Pdf;

let mut pdf = Pdf::open("form.pdf")?;

// Mark a specific page for flattening
pdf.flatten_page_annotations(0);
pdf.save("flattened.pdf")?;

// Or flatten all pages
let mut pdf2 = Pdf::open("form.pdf")?;
pdf2.flatten_all_annotations();
pdf2.save("flattened_all.pdf")?;

C++

auto editor = pdf_oxide::DocumentEditor::open("form.pdf");

// Flatten all form fields
editor.flatten_forms();
editor.save("flattened.pdf");

// Or flatten a single page
auto editor2 = pdf_oxide::DocumentEditor::open("form.pdf");
editor2.flatten_forms_on_page(0);
editor2.save("flattened_page0.pdf");

Swift

let editor = try DocumentEditor.openEditor("form.pdf")

// Flatten all form fields
try editor.flattenForms()
try editor.save("flattened.pdf")

// Or flatten a single page
let editor2 = try DocumentEditor.openEditor("form.pdf")
try editor2.flattenFormsOnPage(0)
try editor2.save("flattened_page0.pdf")

Dart

final editor = DocumentEditor.open('form.pdf');

// Flatten all form fields
editor.flattenForms();
editor.save('flattened.pdf');

// Or flatten a single page
final editor2 = DocumentEditor.open('form.pdf');
editor2.flattenFormsOnPage(0);
editor2.save('flattened_page0.pdf');

editor <- pdf_editor_open("form.pdf")

# Flatten all form fields
pdf_editor_flatten_forms(editor)
pdf_editor_save(editor, "flattened.pdf")

# Or flatten a single page
editor2 <- pdf_editor_open("form.pdf")
pdf_editor_flatten_forms_on_page(editor2, 0)
pdf_editor_save(editor2, "flattened_page0.pdf")

Julia

editor = open_editor("form.pdf")

# Flatten all form fields
flatten_forms(editor)
save(editor, "flattened.pdf")

# Or flatten a single page
editor2 = open_editor("form.pdf")
flatten_forms_on_page(editor2, 0)
save(editor2, "flattened_page0.pdf")

Zig

var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();

// Flatten all form fields
try editor.flattenForms();
try editor.save("flattened.pdf");

// Or flatten a single page
var editor2 = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor2.deinit();
try editor2.flattenFormsOnPage(0);
try editor2.save("flattened_page0.pdf");

Objective-C

POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];

// Flatten all form fields
[editor flattenForms:&err];
[editor saveToPath:@"flattened.pdf" error:&err];

// Or flatten a single page
POXDocumentEditor *editor2 = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
[editor2 flattenFormsOnPage:0 error:&err];
[editor2 saveToPath:@"flattened_page0.pdf" error:&err];

Elixir

{:ok, editor} = PdfOxide.open_editor("form.pdf")

# Flatten all form fields
PdfOxide.flatten_forms(editor)
PdfOxide.editor_save(editor, "flattened.pdf")

# Or flatten a single page
{:ok, editor2} = PdfOxide.open_editor("form.pdf")
PdfOxide.flatten_forms_on_page(editor2, 0)
PdfOxide.editor_save(editor2, "flattened_page0.pdf")

XFA-формы

Анализируйте содержимое XFA-форм (XML Forms Architecture). XFA-формы используют XML-шаблоны вместо полей AcroForm и широко применяются в государственных и корпоративных документах.

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("xfa-form.pdf")
if doc.has_xfa():
    print("This document contains an XFA form")
    fields = doc.get_form_fields()  # Extracts AcroForm fallback fields
    for field in fields:
        print(f"  {field.name}: {field.value}")

Node.js

const doc = new PdfDocument("xfa-form.pdf");
if (doc.hasXFA()) {
  console.log("This document contains an XFA form");
  const fields = doc.getFormFields();
  for (const field of fields) {
    console.log(`  ${field.name}: ${field.value}`);
  }
}
doc.close();

doc, _ := pdfoxide.Open("xfa-form.pdf")
defer doc.Close()
if doc.HasXfa() {
    fmt.Println("This document contains an XFA form")
    fields, _ := doc.FormFields()
    for _, field := range fields {
        fmt.Printf("  %s: %s\n", field.Name, field.Value)
    }
}

using var doc = PdfDocument.Open("xfa-form.pdf");
if (doc.HasXfa)
{
    Console.WriteLine("This document contains an XFA form");
    var fields = doc.GetFormFields();
    foreach (var field in fields)
    {
        Console.WriteLine($"  {field.Name}: {field.Value}");
    }
}

WASM

const doc = new WasmPdfDocument(bytes);
if (doc.hasXfa()) {
    console.log("This document contains an XFA form");
    const fields = doc.getFormFields(); // AcroForm fallback fields
    for (const field of fields) {
        console.log(`  ${field.name}: ${field.value}`);
    }
}

Rust

use pdf_oxide::xfa::analyze_xfa_document;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("xfa-form.pdf")?;
let analysis = analyze_xfa_document(&mut doc)?;
println!("XFA form detected: {} fields", analysis.fields.len());
for field in &analysis.fields {
    println!("  {} ({:?})", field.name, field.field_type);
}

C++

auto doc = pdf_oxide::Document::open("xfa-form.pdf");
if (doc.has_xfa()) {
    std::cout << "This document contains an XFA form\n";
    for (const auto& field : doc.get_form_fields()) { // AcroForm fallback
        std::cout << "  " << field.name << ": " << field.value << "\n";
    }
}

Swift

let doc = try Document.open("xfa-form.pdf")
if try doc.hasXfa() {
    print("This document contains an XFA form")
    for field in try doc.formFields() { // AcroForm fallback
        print("  \(field.name): \(field.value)")
    }
}

Dart

final doc = PdfDocument.open('xfa-form.pdf');
if (doc.hasXfa()) {
  print('This document contains an XFA form');
  for (final field in doc.getFormFields()) { // AcroForm fallback
    print('  ${field.name}: ${field.value}');
  }
}

doc <- pdf_open("xfa-form.pdf")
if (pdf_has_xfa(doc)) {
  cat("This document contains an XFA form\n")
  for (field in pdf_get_form_fields(doc)) { # AcroForm fallback
    cat(sprintf("  %s: %s\n", field$name, field$value))
  }
}

Julia

doc = open_document("xfa-form.pdf")
if has_xfa(doc)
    println("This document contains an XFA form")
    for field in get_form_fields(doc) # AcroForm fallback
        println("  $(field.name): $(field.value)")
    end
end

Zig

var doc = try pdf_oxide.Document.open("xfa-form.pdf");
if (doc.hasXfa()) {
    std.debug.print("This document contains an XFA form\n", .{});
    var fields = try doc.formFields(); // AcroForm fallback
    defer fields.deinit();
    const count = try fields.count();
    var i: i32 = 0;
    while (i < count) : (i += 1) {
        const name = try fields.getName(a, i);
        const value = try fields.getValue(a, i);
        std.debug.print("  {s}: {s}\n", .{ name, value });
    }
}

Objective-C

POXDocument *doc = [POXDocument openPath:@"xfa-form.pdf" error:&err];
if ([doc hasXfa]) {
    NSLog(@"This document contains an XFA form");
    for (POXFormField *field in [doc formFieldsWithError:&err]) { // AcroForm fallback
        NSLog(@"  %@: %@", field.name, field.value);
    }
}

Elixir

{:ok, doc} = PdfOxide.open("xfa-form.pdf")
if PdfOxide.has_xfa?(doc) do
  IO.puts("This document contains an XFA form")
  {:ok, fields} = PdfOxide.form_fields(doc) # AcroForm fallback
  Enum.each(fields, fn field -> IO.puts("  #{field.name}: #{field.value}") end)
end

Справочник по API

Python API

Метод	Описание
`doc.get_form_fields()`	Получить все поля формы в виде объектов `FormField`
`doc.get_form_field_value(name)`	Получить значение конкретного поля по имени
`doc.set_form_field_value(name, value)`	Установить значение поля формы
`doc.export_form_data(path, format="fdf")`	Экспортировать данные формы в файл FDF или XFDF
`doc.has_xfa()`	Проверить, содержит ли документ XFA-форму
`doc.flatten_forms()`	Уплостить все поля формы в содержимое страниц
`doc.flatten_forms_on_page(page)`	Уплостить поля формы на конкретной странице

Свойства Python FormField

Свойство	Тип	Описание
`name`	`str`	Имя поля
`field_type`	`str`	Тип поля (text, checkbox, radio, choice, signature)
`value`	`str \| bool \| None`	Текущее значение поля
`is_required`	`bool`	Является ли поле обязательным
`is_readonly`	`bool`	Является ли поле только для чтения
`max_length`	`int \| None`	Максимальная длина для текстовых полей

JavaScript API

Метод	Описание
`doc.getFormFields()`	Получить все поля формы
`doc.getFormFieldValue(name)`	Получить значение конкретного поля по имени
`doc.setFormFieldValue(name, value)`	Установить значение поля формы
`doc.exportFormData(format?)`	Экспортировать как FDF (по умолчанию) или XFDF, возвращает `Uint8Array`
`doc.hasXfa()`	Проверить, содержит ли документ XFA-форму
`doc.flattenForms()`	Уплостить все поля формы в содержимое страниц
`doc.flattenFormsOnPage(pageIndex)`	Уплостить поля формы на конкретной странице

Свойства JavaScript FormField

Свойство	Тип	Описание
`name`	`string`	Имя поля
`fieldType`	`string`	Тип поля
`value`	`string \| boolean \| null`	Текущее значение
`flags`	`number`	Флаги поля

Rust API

Функция	Описание
`FormExtractor::extract_fields(doc)`	Извлечь все поля формы из словаря AcroForm
`FormExtractor::export_fdf(doc, fields)`	Экспортировать как байты FDF
`FormExtractor::export_xfdf(doc, fields)`	Экспортировать как строку XFDF
`analyze_xfa_document(doc)`	Проанализировать структуру XFA-формы
`editor.get_form_fields()`	Получить поля через DocumentEditor
`editor.get_form_field_value(name)`	Получить значение поля по имени
`editor.set_form_field_value(name, value)`	Установить значение поля

Поля FormField (Rust)

Поле	Тип	Описание
`name`	`String`	Имя поля из ключа `/T`
`full_name`	`String`	Полностью квалифицированное имя (через точку)
`field_type`	`FieldType`	Button, Text, Choice, Signature, Unknown
`value`	`FieldValue`	Текущее значение поля
`tooltip`	`Option<String>`	Подсказка из ключа `/TU`
`bounds`	`Option<[f64; 4]>`	Ограничивающий прямоугольник `[x1, y1, x2, y2]`
`flags`	`Option<u32>`	Флаги поля (ReadOnly, Required, NoExport)
`default_value`	`Option<FieldValue>`	Значение по умолчанию из ключа `/DV`
`max_length`	`Option<u32>`	Максимальная длина для текстовых полей

Варианты FieldType

Вариант	Описание
`Button`	Флажок, переключатель или кнопка (`/Btn`)
`Text`	Однострочное или многострочное текстовое поле (`/Tx`)
`Choice`	Список или выпадающий список (`/Ch`)
`Signature`	Поле цифровой подписи (`/Sig`)
`Unknown(String)`	Нераспознанный тип поля

Варианты FieldValue

Вариант	Описание
`Text(String)`	Текстовое строковое значение
`Boolean(bool)`	Логическое значение (флажки)
`Name(String)`	Именованное значение (переключатели, поля выбора)
`Array(Vec<String>)`	Несколько значений (многострочные списки)
`None`	Значение отсутствует

Расширенный сценарий: проверка обязательных полей

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()

missing = [f for f in fields if f.is_required and not f.value]
if missing:
    print("Missing required fields:")
    for f in missing:
        print(f"  - {f.name}")

Rust

use pdf_oxide::extractors::{FormExtractor, FieldValue};
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;

let required_empty: Vec<_> = fields.iter()
    .filter(|f| {
        f.flags.map_or(false, |flags| flags & 0x02 != 0)
            && matches!(f.value, FieldValue::None | FieldValue::Text(ref s) if s.is_empty())
    })
    .collect();

if !required_empty.is_empty() {
    println!("Missing required fields:");
    for f in &required_empty {
        println!("  - {}", f.full_name);
    }
}

C++

auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
    if (field.required && field.value.empty()) {
        std::cout << "Missing required field: " << field.name << "\n";
    }
}

Swift

let doc = try Document.open("form.pdf")
for field in try doc.formFields() where field.required && field.value.isEmpty {
    print("Missing required field: \(field.name)")
}

Dart

final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
  if (field.required && field.value.isEmpty) {
    print('Missing required field: ${field.name}');
  }
}

doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
  if (field$required && field$value == "") {
    cat(sprintf("Missing required field: %s\n", field$name))
  }
}

Julia

doc = open_document("form.pdf")
for field in get_form_fields(doc)
    if field.required && isempty(field.value)
        println("Missing required field: $(field.name)")
    end
end

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const value = try fields.getValue(a, i);
    if (try fields.isRequired(i) and value.len == 0) {
        const name = try fields.getName(a, i);
        std.debug.print("Missing required field: {s}\n", .{name});
    }
}

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    if (field.required && field.value.length == 0) {
        NSLog(@"Missing required field: %@", field.name);
    }
}

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
fields
|> Enum.filter(fn f -> f.required and f.value in [nil, ""] end)
|> Enum.each(fn f -> IO.puts("Missing required field: #{f.name}") end)

Связанные страницы

Заполнение PDF-форм – Пошаговое руководство по заполнению форм
Извлечение аннотаций – Работа с аннотациями вместе с полями форм
Извлечение текста – Извлечение текстового содержимого страниц
Метаданные и XMP – Чтение свойств документа