Skip to content

Extracción de Datos de Formularios

PDF Oxide extrae campos de formularios interactivos (AcroForms) de documentos PDF, incluyendo campos de texto, casillas de verificación, botones de opción, campos de selección y firmas. Los datos extraídos pueden exportarse en formato FDF o XFDF para intercambio con otras aplicaciones. También se pueden analizar y convertir formularios XFA (XML Forms Architecture).

Ejemplo Rápido

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
for field in fields:
    print(f"{field.name} ({field.field_type}): {field.value}")

Node.js

const { PdfDocument } = require("pdf-oxide");

const doc = new PdfDocument("form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
  console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
doc.close();

Go

import pdfoxide "github.com/yfedoseev/pdf_oxide/go"

doc, _ := pdfoxide.Open("form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
    fmt.Printf("%s (%s): %s\n", field.Name, field.FieldType, field.Value)
}

C#

using PdfOxide.Core;

using var doc = PdfDocument.Open("form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
    Console.WriteLine($"{field.Name} ({field.FieldType}): {field.Value}");
}

WASM

const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
    console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
    println!("{} ({:?}): {:?}", field.full_name, field.field_type, field.value);
}

Java

import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;

try (PdfDocument doc = PdfDocument.open("form.pdf")) {
    for (FormField field : doc.formFields()) {
        System.out.printf("%s (%s): %s%n",
            field.name(), field.type(), field.value().orElse(""));
    }
}

Ruby

require 'pdf_oxide'

PdfOxide::PdfDocument.open('form.pdf') do |doc|
  doc.form_fields.each do |field|
    puts "#{field[:name]} (#{field[:type]}): #{field[:value]}"
  end
end

C++

#include <pdf_oxide/pdf_oxide.hpp>

auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
    std::cout << field.name << " (" << field.type << "): " << field.value << "\n";
}

Swift

import PdfOxide

let doc = try Document.open("form.pdf")
for field in try doc.formFields() {
    print("\(field.name) (\(field.type)): \(field.value)")
}

Kotlin

import fyi.oxide.pdf.PdfDocument

PdfDocument.open("form.pdf").use { doc ->
    doc.formFields().forEach { field ->
        println("${field.name()} (${field.type()}): ${field.valueOrNull() ?: ""}")
    }
}

Dart

import 'package:pdf_oxide/pdf_oxide.dart';

final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
  print('${field.name} (${field.type}): ${field.value}');
}

R

library(pdfoxide)

doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
  cat(sprintf("%s (%s): %s\n", field$name, field$type, field$value))
}

Julia

using PdfOxide

doc = open_document("form.pdf")
for field in get_form_fields(doc)
    println("$(field.name) ($(field.type)): $(field.value)")
end

Zig

const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;

var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const name = try fields.getName(a, i);
    const ftype = try fields.getType(a, i);
    const value = try fields.getValue(a, i);
    std.debug.print("{s} ({s}): {s}\n", .{ name, ftype, value });
}

Scala

import fyi.oxide.pdf.{PdfDocument, valueOption}
import scala.util.Using

Using.resource(PdfDocument.open("form.pdf")) { doc =>
  doc.formFieldsSeq.foreach { field =>
    println(s"${field.name} (${field.`type`}): ${field.valueOption.getOrElse("")}")
  }
}

Clojure

(require '[pdf-oxide.core :as pdf])

(with-open [doc (pdf/open "form.pdf")]
  (doseq [field (pdf/form-fields doc)]
    (println (format "%s (%s): %s"
                     (.name field) (.type field) (.orElse (.value field) "")))))

Objective-C

#import "POXPdfOxide.h"
NSError *err = nil;

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    NSLog(@"%@ (%@): %@", field.name, field.type, field.value);
}

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
  IO.puts("#{field.name} (#{field.type}): #{field.value}")
end)

Migración desde PyMuPDF get_form_fields()

Si estás migrando desde PyMuPDF, las APIs son similares en estructura, pero PDF Oxide devuelve datos más ricos y admite formularios XFA:

PyMuPDF:

import fitz

doc = fitz.open("form.pdf")
# Returns dict of {field_name: field_value} — loses type info
fields = doc.get_form_fields()

# Or iterate widgets for more detail
for page in doc:
    for widget in page.widgets():
        print(widget.field_name, widget.field_value)

PDF Oxide:

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
# Returns structured objects with name, value, type, options, rect
fields = doc.get_form_fields()
for field in fields:
    print(f"{field.name} ({field.field_type}): {field.value}")

# Also handles XFA forms that PyMuPDF cannot read
xfa = doc.has_xfa()

Diferencias clave:

  • PDF Oxide devuelve objetos de campo estructurados (no solo un diccionario)
  • Incluye tipo de campo, rectángulo delimitador y opciones para campos de selección
  • Admite formularios XFA — el get_form_fields() de PyMuPDF devuelve vacío para PDFs exclusivamente XFA
  • Exportación a FDF/XFDF para intercambio de datos de formularios entre sistemas

Para una guía de migración completa que cubre PyMuPDF, pypdf, pdfplumber y pdfminer, consulta Migrar a PDF Oxide.


Lectura de Campos de Formulario

Obtener Todos los Campos

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("tax-form.pdf")
fields = doc.get_form_fields()

for field in fields:
    print(f"Name: {field.name}")
    print(f"  Type: {field.field_type}")
    print(f"  Value: {field.value}")
    print(f"  Required: {field.is_required}")
    print(f"  Read-only: {field.is_readonly}")
    if field.max_length:
        print(f"  Max length: {field.max_length}")

Node.js

const doc = new PdfDocument("tax-form.pdf");
const fields = doc.getFormFields();

for (const field of fields) {
  console.log(`Name: ${field.name}`);
  console.log(`  Type: ${field.fieldType}`);
  console.log(`  Value: ${field.value}`);
}
doc.close();

Go

doc, _ := pdfoxide.Open("tax-form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()

for _, field := range fields {
    fmt.Printf("Name: %s\n", field.Name)
    fmt.Printf("  Type: %s\n", field.FieldType)
    fmt.Printf("  Value: %s\n", field.Value)
}

C#

using var doc = PdfDocument.Open("tax-form.pdf");
var fields = doc.GetFormFields();

foreach (var field in fields)
{
    Console.WriteLine($"Name: {field.Name}");
    Console.WriteLine($"  Type: {field.FieldType}");
    Console.WriteLine($"  Value: {field.Value}");
}

WASM

const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();

for (const field of fields) {
    console.log(`Name: ${field.name}`);
    console.log(`  Type: ${field.fieldType}`);
    console.log(`  Value: ${field.value}`);
    console.log(`  Flags: ${field.flags}`);
}

Rust

use pdf_oxide::extractors::{FormExtractor, FieldType};
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("tax-form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;

for field in &fields {
    let type_str = match &field.field_type {
        FieldType::Button => "Button",
        FieldType::Text => "Text",
        FieldType::Choice => "Choice",
        FieldType::Signature => "Signature",
        FieldType::Unknown(s) => s.as_str(),
    };

    println!("[{}] {} = {:?}", type_str, field.full_name, field.value);

    if let Some(tooltip) = &field.tooltip {
        println!("  Tooltip: {}", tooltip);
    }
    if let Some(bounds) = &field.bounds {
        println!("  Bounds: [{:.1}, {:.1}, {:.1}, {:.1}]",
            bounds[0], bounds[1], bounds[2], bounds[3]);
    }
}

Java

import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;

try (PdfDocument doc = PdfDocument.open("tax-form.pdf")) {
    for (FormField field : doc.formFields()) {
        System.out.println("Name: " + field.name());
        System.out.println("  Type: " + field.type());
        System.out.println("  Value: " + field.value().orElse(""));
    }
}

Ruby

PdfOxide::PdfDocument.open('tax-form.pdf') do |doc|
  doc.form_fields.each do |field|
    puts "Name: #{field[:name]}"
    puts "  Type: #{field[:type]}"
    puts "  Value: #{field[:value]}"
  end
end

C++

auto doc = pdf_oxide::Document::open("tax-form.pdf");
for (const auto& field : doc.get_form_fields()) {
    std::cout << "Name: " << field.name << "\n";
    std::cout << "  Type: " << field.type << "\n";
    std::cout << "  Value: " << field.value << "\n";
    std::cout << "  Read-only: " << field.readonly << "\n";
    std::cout << "  Required: " << field.required << "\n";
}

Swift

let doc = try Document.open("tax-form.pdf")
for field in try doc.formFields() {
    print("Name: \(field.name)")
    print("  Type: \(field.type)")
    print("  Value: \(field.value)")
    print("  Read-only: \(field.readonly)")
    print("  Required: \(field.required)")
}

Kotlin

PdfDocument.open("tax-form.pdf").use { doc ->
    doc.formFields().forEach { field ->
        println("Name: ${field.name()}")
        println("  Type: ${field.type()}")
        println("  Value: ${field.valueOrNull() ?: ""}")
    }
}

Dart

final doc = PdfDocument.open('tax-form.pdf');
for (final field in doc.getFormFields()) {
  print('Name: ${field.name}');
  print('  Type: ${field.type}');
  print('  Value: ${field.value}');
  print('  Read-only: ${field.readonly}');
  print('  Required: ${field.required}');
}

R

doc <- pdf_open("tax-form.pdf")
for (field in pdf_get_form_fields(doc)) {
  cat(sprintf("Name: %s\n", field$name))
  cat(sprintf("  Type: %s\n", field$type))
  cat(sprintf("  Value: %s\n", field$value))
  cat(sprintf("  Read-only: %s\n", field$readonly))
  cat(sprintf("  Required: %s\n", field$required))
}

Julia

doc = open_document("tax-form.pdf")
for field in get_form_fields(doc)
    println("Name: $(field.name)")
    println("  Type: $(field.type)")
    println("  Value: $(field.value)")
    println("  Read-only: $(field.readonly)")
    println("  Required: $(field.required)")
end

Zig

var doc = try pdf_oxide.Document.open("tax-form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const name = try fields.getName(a, i);
    const ftype = try fields.getType(a, i);
    const value = try fields.getValue(a, i);
    std.debug.print("Name: {s}\n  Type: {s}\n  Value: {s}\n", .{ name, ftype, value });
    std.debug.print("  Read-only: {}\n  Required: {}\n", .{
        try fields.isReadonly(i), try fields.isRequired(i),
    });
}

Scala

Using.resource(PdfDocument.open("tax-form.pdf")) { doc =>
  doc.formFieldsSeq.foreach { field =>
    println(s"Name: ${field.name}")
    println(s"  Type: ${field.`type`}")
    println(s"  Value: ${field.valueOption.getOrElse("")}")
  }
}

Clojure

(with-open [doc (pdf/open "tax-form.pdf")]
  (doseq [field (pdf/form-fields doc)]
    (println "Name:" (.name field))
    (println "  Type:" (.type field))
    (println "  Value:" (.orElse (.value field) ""))))

Objective-C

POXDocument *doc = [POXDocument openPath:@"tax-form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    NSLog(@"Name: %@", field.name);
    NSLog(@"  Type: %@", field.type);
    NSLog(@"  Value: %@", field.value);
    NSLog(@"  Read-only: %d", field.readonly);
    NSLog(@"  Required: %d", field.required);
}

Elixir

{:ok, doc} = PdfOxide.open("tax-form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
  IO.puts("Name: #{field.name}")
  IO.puts("  Type: #{field.type}")
  IO.puts("  Value: #{field.value}")
  IO.puts("  Read-only: #{field.read_only}")
  IO.puts("  Required: #{field.required}")
end)

Obtener el Valor de un Campo Específico

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

name = doc.get_form_field_value("employee_name")
ssn = doc.get_form_field_value("ssn")
agreed = doc.get_form_field_value("agree_to_terms")

print(f"Name: {name}")       # "John Doe"
print(f"SSN: {ssn}")         # "123-45-6789"
print(f"Agreed: {agreed}")   # True

WASM

const doc = new WasmPdfDocument(bytes);

const name = doc.getFormFieldValue("employee_name");
const ssn = doc.getFormFieldValue("ssn");
const agreed = doc.getFormFieldValue("agree_to_terms");

console.log(`Name: ${name}`);     // "John Doe"
console.log(`SSN: ${ssn}`);       // "123-45-6789"
console.log(`Agreed: ${agreed}`); // true

Rust

use pdf_oxide::editor::{DocumentEditor, EditableDocument};

let mut editor = DocumentEditor::open("form.pdf")?;

if let Some(value) = editor.get_form_field_value("employee_name")? {
    println!("Name: {:?}", value);
}

Rellenar Formularios

Establecer Valores de Campos

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Set text fields
doc.set_form_field_value("full_name", "Jane Doe")
doc.set_form_field_value("email", "jane@example.com")

# Set checkboxes
doc.set_form_field_value("agree_to_terms", True)

# Save the filled form
doc.save("filled_form.pdf")

WASM

const doc = new WasmPdfDocument(bytes);

// Set text fields
doc.setFormFieldValue("full_name", "Jane Doe");
doc.setFormFieldValue("email", "jane@example.com");

// Set checkboxes
doc.setFormFieldValue("agree_to_terms", true);

// Save the filled form
const filledBytes = doc.save();

Rust

use pdf_oxide::editor::{DocumentEditor, EditableDocument, FormFieldValue};

let mut editor = DocumentEditor::open("form.pdf")?;

// Set text fields
editor.set_form_field_value("full_name", FormFieldValue::Text("Jane Doe".into()))?;
editor.set_form_field_value("email", FormFieldValue::Text("jane@example.com".into()))?;

// Set checkboxes
editor.set_form_field_value("agree_to_terms", FormFieldValue::Boolean(true))?;

// Set choice fields
editor.set_form_field_value("state", FormFieldValue::Choice("California".into()))?;

editor.save("filled_form.pdf")?;

Java

import fyi.oxide.pdf.DocumentEditor;

try (DocumentEditor editor = DocumentEditor.open("form.pdf")) {
    // Set text fields
    editor.setFormField("full_name", "Jane Doe");
    editor.setFormField("email", "jane@example.com");

    // Set checkboxes
    editor.setFormField("agree_to_terms", true);

    editor.saveTo(java.nio.file.Path.of("filled_form.pdf"));
}

Ruby

PdfOxide::DocumentEditor.open('form.pdf') do |editor|
  # Set text fields
  editor.set_form_field('full_name', 'Jane Doe')
  editor.set_form_field('email', 'jane@example.com')

  # Set checkboxes
  editor.set_form_field('agree_to_terms', true)

  editor.save_to('filled_form.pdf')
end

C++

auto editor = pdf_oxide::DocumentEditor::open("form.pdf");

// Set text fields
editor.set_form_field_value("full_name", "Jane Doe");
editor.set_form_field_value("email", "jane@example.com");

// Set checkboxes
editor.set_form_field_value("agree_to_terms", "true");

editor.save("filled_form.pdf");

Swift

let editor = try DocumentEditor.openEditor("form.pdf")

// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe")
try editor.setFormFieldValue("email", "jane@example.com")

// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true")

try editor.save("filled_form.pdf")

Kotlin

import fyi.oxide.pdf.DocumentEditor

DocumentEditor.open("form.pdf").use { editor ->
    // Set text fields
    editor.setFormField("full_name", "Jane Doe")
    editor.setFormField("email", "jane@example.com")

    // Set checkboxes
    editor.setFormField("agree_to_terms", true)

    editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}

Dart

final editor = DocumentEditor.open('form.pdf');

// Set text fields
editor.setFormFieldValue('full_name', 'Jane Doe');
editor.setFormFieldValue('email', 'jane@example.com');

// Set checkboxes
editor.setFormFieldValue('agree_to_terms', 'true');

editor.save('filled_form.pdf');

R

editor <- pdf_editor_open("form.pdf")

# Set text fields
pdf_editor_set_form_field_value(editor, "full_name", "Jane Doe")
pdf_editor_set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
pdf_editor_set_form_field_value(editor, "agree_to_terms", "true")

pdf_editor_save(editor, "filled_form.pdf")

Julia

editor = open_editor("form.pdf")

# Set text fields
set_form_field_value(editor, "full_name", "Jane Doe")
set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
set_form_field_value(editor, "agree_to_terms", "true")

save(editor, "filled_form.pdf")

Zig

var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();

// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe");
try editor.setFormFieldValue("email", "jane@example.com");

// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true");

try editor.save("filled_form.pdf");

Scala

import fyi.oxide.pdf.DocumentEditor
import scala.util.Using

Using.resource(DocumentEditor.open("form.pdf")) { editor =>
  // Set text fields
  editor.setFormField("full_name", "Jane Doe")
  editor.setFormField("email", "jane@example.com")

  // Set checkboxes
  editor.setFormField("agree_to_terms", true)

  editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}

Clojure

(with-open [editor (pdf/editor "form.pdf")]
  ;; Set text fields
  (.setFormField editor "full_name" "Jane Doe")
  (.setFormField editor "email" "jane@example.com")

  ;; Set checkboxes
  (.setFormField editor "agree_to_terms" true)

  (.saveTo editor (java.nio.file.Path/of "filled_form.pdf" (make-array String 0))))

Objective-C

POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];

// Set text fields
[editor setFormField:@"full_name" value:@"Jane Doe" error:&err];
[editor setFormField:@"email" value:@"jane@example.com" error:&err];

// Set checkboxes
[editor setFormField:@"agree_to_terms" value:@"true" error:&err];

[editor saveToPath:@"filled_form.pdf" error:&err];

Elixir

{:ok, editor} = PdfOxide.open_editor("form.pdf")

# Set text fields
PdfOxide.set_form_field_value(editor, "full_name", "Jane Doe")
PdfOxide.set_form_field_value(editor, "email", "jane@example.com")

# Set checkboxes
PdfOxide.set_form_field_value(editor, "agree_to_terms", "true")

PdfOxide.editor_save(editor, "filled_form.pdf")

Exportar Datos del Formulario

Exporta los datos de los campos del formulario en FDF o XFDF para compartirlos con otras aplicaciones.

Exportación FDF

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.fdf")

WASM

const doc = new WasmPdfDocument(bytes);
const fdfBytes = doc.exportFormData("fdf");
// fdfBytes is a Uint8Array

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let fdf_bytes = FormExtractor::export_fdf(&mut doc, fields)?;
std::fs::write("form_data.fdf", &fdf_bytes)?;

C++

auto doc = pdf_oxide::Document::open("form.pdf");
auto fdf = doc.export_form_data_to_bytes(0); // 0 = FDF
std::ofstream("form_data.fdf", std::ios::binary)
    .write(reinterpret_cast<const char*>(fdf.data()), fdf.size());

Swift

let doc = try Document.open("form.pdf")
let fdf = try doc.exportFormData(formatType: 0) // 0 = FDF
try Data(fdf).write(to: URL(fileURLWithPath: "form_data.fdf"))

Dart

final doc = PdfDocument.open('form.pdf');
final fdf = doc.exportFormDataToBytes(0); // 0 = FDF
File('form_data.fdf').writeAsBytesSync(fdf);

R

doc <- pdf_open("form.pdf")
fdf <- pdf_export_form_data_to_bytes(doc, format_type = 0L) # 0 = FDF
writeBin(fdf, "form_data.fdf")

Julia

doc = open_document("form.pdf")
fdf = export_form_data_to_bytes(doc, 0) # 0 = FDF
write("form_data.fdf", fdf)

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
const fdf = try doc.exportFormDataToBytes(a, 0); // 0 = FDF
defer a.free(fdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.fdf", .data = fdf });

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *fdf = [doc exportFormDataToBytes:0 error:&err]; // 0 = FDF
[fdf writeToFile:@"form_data.fdf" atomically:YES];

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fdf} = PdfOxide.export_form_data_to_bytes(doc, 0) # 0 = FDF
File.write!("form_data.fdf", fdf)

Exportación XFDF

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.xfdf", format="xfdf")

WASM

const doc = new WasmPdfDocument(bytes);
const xfdfBytes = doc.exportFormData("xfdf");

Rust

use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let xfdf = FormExtractor::export_xfdf(&mut doc, fields)?;
std::fs::write("form_data.xfdf", &xfdf)?;

C++

auto doc = pdf_oxide::Document::open("form.pdf");
auto xfdf = doc.export_form_data_to_bytes(1); // 1 = XFDF
std::ofstream("form_data.xfdf", std::ios::binary)
    .write(reinterpret_cast<const char*>(xfdf.data()), xfdf.size());

Swift

let doc = try Document.open("form.pdf")
let xfdf = try doc.exportFormData(formatType: 1) // 1 = XFDF
try Data(xfdf).write(to: URL(fileURLWithPath: "form_data.xfdf"))

Dart

final doc = PdfDocument.open('form.pdf');
final xfdf = doc.exportFormDataToBytes(1); // 1 = XFDF
File('form_data.xfdf').writeAsBytesSync(xfdf);

R

doc <- pdf_open("form.pdf")
xfdf <- pdf_export_form_data_to_bytes(doc, format_type = 1L) # 1 = XFDF
writeBin(xfdf, "form_data.xfdf")

Julia

doc = open_document("form.pdf")
xfdf = export_form_data_to_bytes(doc, 1) # 1 = XFDF
write("form_data.xfdf", xfdf)

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
const xfdf = try doc.exportFormDataToBytes(a, 1); // 1 = XFDF
defer a.free(xfdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.xfdf", .data = xfdf });

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *xfdf = [doc exportFormDataToBytes:1 error:&err]; // 1 = XFDF
[xfdf writeToFile:@"form_data.xfdf" atomically:YES];

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, xfdf} = PdfOxide.export_form_data_to_bytes(doc, 1) # 1 = XFDF
File.write!("form_data.xfdf", xfdf)

Sobre la importación de datos

La exportación es el lado de lectura del ciclo completo; la escritura —cargar datos FDF/XFDF de nuevo en un PDF— se cubre en la documentación de edición. El C ABI y el binding de Swift declaran pdf_editor_import_fdf_bytes, pdf_editor_import_xfdf_bytes, pdf_form_import_from_file y pdf_document_import_form_data, aunque en v0.3.69 devuelven el estado Unsupported. El enfoque portátil recomendado es analizar el FDF/XFDF manualmente y llamar a set_form_field_value. Para ejemplos completos y ejecutables de importación, consulta Edición de Campos de Formulario → Importar Datos de Formulario.


Campos de Formulario en Markdown/HTML

Los valores de los campos de formulario se incluyen de forma predeterminada en la conversión a Markdown y HTML. Usa include_form_fields para controlar este comportamiento.

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Include form field values (default)
md = doc.to_markdown(0, include_form_fields=True)

# Exclude form fields
md = doc.to_markdown(0, include_form_fields=False)

WASM

const doc = new WasmPdfDocument(bytes);

// Include form fields (default: true)
const md = doc.toMarkdown(0, true, true, true);

// Exclude form fields (4th parameter)
const md2 = doc.toMarkdown(0, true, true, false);

Rust

use pdf_oxide::PdfDocument;
use pdf_oxide::converters::ConversionOptions;

let doc = PdfDocument::open("form.pdf")?;
let options = ConversionOptions {
    include_form_fields: true,
    ..Default::default()
};
let md = doc.to_markdown(0, &options)?;

Aplanar Formularios

Convierte los campos de formulario en contenido estático de la página, haciéndolos no editables. Útil para generar PDFs finalizados.

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")

# Flatten all form fields
doc.flatten_forms()
doc.save("flattened.pdf")

# Or flatten a single page
doc2 = PdfDocument("form.pdf")
doc2.flatten_forms_on_page(0)
doc2.save("flattened_page0.pdf")

WASM

const doc = new WasmPdfDocument(bytes);

// Flatten all form fields
doc.flattenForms();
const flattened = doc.save();

// Or flatten a single page
const doc2 = new WasmPdfDocument(bytes);
doc2.flattenFormsOnPage(0);
const flattened2 = doc2.save();

Rust

use pdf_oxide::Pdf;

let mut pdf = Pdf::open("form.pdf")?;

// Mark a specific page for flattening
pdf.flatten_page_annotations(0);
pdf.save("flattened.pdf")?;

// Or flatten all pages
let mut pdf2 = Pdf::open("form.pdf")?;
pdf2.flatten_all_annotations();
pdf2.save("flattened_all.pdf")?;

C++

auto editor = pdf_oxide::DocumentEditor::open("form.pdf");

// Flatten all form fields
editor.flatten_forms();
editor.save("flattened.pdf");

// Or flatten a single page
auto editor2 = pdf_oxide::DocumentEditor::open("form.pdf");
editor2.flatten_forms_on_page(0);
editor2.save("flattened_page0.pdf");

Swift

let editor = try DocumentEditor.openEditor("form.pdf")

// Flatten all form fields
try editor.flattenForms()
try editor.save("flattened.pdf")

// Or flatten a single page
let editor2 = try DocumentEditor.openEditor("form.pdf")
try editor2.flattenFormsOnPage(0)
try editor2.save("flattened_page0.pdf")

Dart

final editor = DocumentEditor.open('form.pdf');

// Flatten all form fields
editor.flattenForms();
editor.save('flattened.pdf');

// Or flatten a single page
final editor2 = DocumentEditor.open('form.pdf');
editor2.flattenFormsOnPage(0);
editor2.save('flattened_page0.pdf');

R

editor <- pdf_editor_open("form.pdf")

# Flatten all form fields
pdf_editor_flatten_forms(editor)
pdf_editor_save(editor, "flattened.pdf")

# Or flatten a single page
editor2 <- pdf_editor_open("form.pdf")
pdf_editor_flatten_forms_on_page(editor2, 0)
pdf_editor_save(editor2, "flattened_page0.pdf")

Julia

editor = open_editor("form.pdf")

# Flatten all form fields
flatten_forms(editor)
save(editor, "flattened.pdf")

# Or flatten a single page
editor2 = open_editor("form.pdf")
flatten_forms_on_page(editor2, 0)
save(editor2, "flattened_page0.pdf")

Zig

var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();

// Flatten all form fields
try editor.flattenForms();
try editor.save("flattened.pdf");

// Or flatten a single page
var editor2 = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor2.deinit();
try editor2.flattenFormsOnPage(0);
try editor2.save("flattened_page0.pdf");

Objective-C

POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];

// Flatten all form fields
[editor flattenForms:&err];
[editor saveToPath:@"flattened.pdf" error:&err];

// Or flatten a single page
POXDocumentEditor *editor2 = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
[editor2 flattenFormsOnPage:0 error:&err];
[editor2 saveToPath:@"flattened_page0.pdf" error:&err];

Elixir

{:ok, editor} = PdfOxide.open_editor("form.pdf")

# Flatten all form fields
PdfOxide.flatten_forms(editor)
PdfOxide.editor_save(editor, "flattened.pdf")

# Or flatten a single page
{:ok, editor2} = PdfOxide.open_editor("form.pdf")
PdfOxide.flatten_forms_on_page(editor2, 0)
PdfOxide.editor_save(editor2, "flattened_page0.pdf")

Formularios XFA

Analiza el contenido de formularios XFA (XML Forms Architecture). Los formularios XFA utilizan plantillas basadas en XML en lugar de campos AcroForm y son comunes en formularios gubernamentales y empresariales.

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("xfa-form.pdf")
if doc.has_xfa():
    print("This document contains an XFA form")
    fields = doc.get_form_fields()  # Extracts AcroForm fallback fields
    for field in fields:
        print(f"  {field.name}: {field.value}")

Node.js

const doc = new PdfDocument("xfa-form.pdf");
if (doc.hasXFA()) {
  console.log("This document contains an XFA form");
  const fields = doc.getFormFields();
  for (const field of fields) {
    console.log(`  ${field.name}: ${field.value}`);
  }
}
doc.close();

Go

doc, _ := pdfoxide.Open("xfa-form.pdf")
defer doc.Close()
if doc.HasXfa() {
    fmt.Println("This document contains an XFA form")
    fields, _ := doc.FormFields()
    for _, field := range fields {
        fmt.Printf("  %s: %s\n", field.Name, field.Value)
    }
}

C#

using var doc = PdfDocument.Open("xfa-form.pdf");
if (doc.HasXfa)
{
    Console.WriteLine("This document contains an XFA form");
    var fields = doc.GetFormFields();
    foreach (var field in fields)
    {
        Console.WriteLine($"  {field.Name}: {field.Value}");
    }
}

WASM

const doc = new WasmPdfDocument(bytes);
if (doc.hasXfa()) {
    console.log("This document contains an XFA form");
    const fields = doc.getFormFields(); // AcroForm fallback fields
    for (const field of fields) {
        console.log(`  ${field.name}: ${field.value}`);
    }
}

Rust

use pdf_oxide::xfa::analyze_xfa_document;
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("xfa-form.pdf")?;
let analysis = analyze_xfa_document(&mut doc)?;
println!("XFA form detected: {} fields", analysis.fields.len());
for field in &analysis.fields {
    println!("  {} ({:?})", field.name, field.field_type);
}

C++

auto doc = pdf_oxide::Document::open("xfa-form.pdf");
if (doc.has_xfa()) {
    std::cout << "This document contains an XFA form\n";
    for (const auto& field : doc.get_form_fields()) { // AcroForm fallback
        std::cout << "  " << field.name << ": " << field.value << "\n";
    }
}

Swift

let doc = try Document.open("xfa-form.pdf")
if try doc.hasXfa() {
    print("This document contains an XFA form")
    for field in try doc.formFields() { // AcroForm fallback
        print("  \(field.name): \(field.value)")
    }
}

Dart

final doc = PdfDocument.open('xfa-form.pdf');
if (doc.hasXfa()) {
  print('This document contains an XFA form');
  for (final field in doc.getFormFields()) { // AcroForm fallback
    print('  ${field.name}: ${field.value}');
  }
}

R

doc <- pdf_open("xfa-form.pdf")
if (pdf_has_xfa(doc)) {
  cat("This document contains an XFA form\n")
  for (field in pdf_get_form_fields(doc)) { # AcroForm fallback
    cat(sprintf("  %s: %s\n", field$name, field$value))
  }
}

Julia

doc = open_document("xfa-form.pdf")
if has_xfa(doc)
    println("This document contains an XFA form")
    for field in get_form_fields(doc) # AcroForm fallback
        println("  $(field.name): $(field.value)")
    end
end

Zig

var doc = try pdf_oxide.Document.open("xfa-form.pdf");
if (doc.hasXfa()) {
    std.debug.print("This document contains an XFA form\n", .{});
    var fields = try doc.formFields(); // AcroForm fallback
    defer fields.deinit();
    const count = try fields.count();
    var i: i32 = 0;
    while (i < count) : (i += 1) {
        const name = try fields.getName(a, i);
        const value = try fields.getValue(a, i);
        std.debug.print("  {s}: {s}\n", .{ name, value });
    }
}

Objective-C

POXDocument *doc = [POXDocument openPath:@"xfa-form.pdf" error:&err];
if ([doc hasXfa]) {
    NSLog(@"This document contains an XFA form");
    for (POXFormField *field in [doc formFieldsWithError:&err]) { // AcroForm fallback
        NSLog(@"  %@: %@", field.name, field.value);
    }
}

Elixir

{:ok, doc} = PdfOxide.open("xfa-form.pdf")
if PdfOxide.has_xfa?(doc) do
  IO.puts("This document contains an XFA form")
  {:ok, fields} = PdfOxide.form_fields(doc) # AcroForm fallback
  Enum.each(fields, fn field -> IO.puts("  #{field.name}: #{field.value}") end)
end

Referencia de API

Python API

Método Descripción
doc.get_form_fields() Obtener todos los campos como objetos FormField
doc.get_form_field_value(name) Obtener el valor de un campo específico por nombre
doc.set_form_field_value(name, value) Establecer el valor de un campo de formulario
doc.export_form_data(path, format="fdf") Exportar datos a archivo FDF o XFDF
doc.has_xfa() Comprobar si el documento contiene un formulario XFA
doc.flatten_forms() Aplanar todos los campos de formulario en el contenido de la página
doc.flatten_forms_on_page(page) Aplanar campos en una página específica

Propiedades Python FormField

Propiedad Tipo Descripción
name str Nombre del campo
field_type str Tipo de campo (text, checkbox, radio, choice, signature)
value str | bool | None Valor actual del campo
is_required bool Si el campo es obligatorio
is_readonly bool Si el campo es de solo lectura
max_length int | None Longitud máxima para campos de texto

JavaScript API

Método Descripción
doc.getFormFields() Obtener todos los campos de formulario
doc.getFormFieldValue(name) Obtener el valor de un campo específico por nombre
doc.setFormFieldValue(name, value) Establecer el valor de un campo de formulario
doc.exportFormData(format?) Exportar como FDF (predeterminado) o XFDF, devuelve Uint8Array
doc.hasXfa() Comprobar si el documento contiene un formulario XFA
doc.flattenForms() Aplanar todos los campos de formulario en el contenido de la página
doc.flattenFormsOnPage(pageIndex) Aplanar campos en una página específica

Propiedades JavaScript FormField

Propiedad Tipo Descripción
name string Nombre del campo
fieldType string Tipo de campo
value string | boolean | null Valor actual
flags number Flags del campo

Rust API

Función Descripción
FormExtractor::extract_fields(doc) Extraer todos los campos del diccionario AcroForm
FormExtractor::export_fdf(doc, fields) Exportar como bytes FDF
FormExtractor::export_xfdf(doc, fields) Exportar como cadena XFDF
analyze_xfa_document(doc) Analizar la estructura del formulario XFA
editor.get_form_fields() Obtener campos a través de DocumentEditor
editor.get_form_field_value(name) Obtener valor del campo por nombre
editor.set_form_field_value(name, value) Establecer valor del campo

Campos FormField (Rust)

Campo Tipo Descripción
name String Nombre del campo de la clave /T
full_name String Nombre completamente calificado (separado por punto)
field_type FieldType Button, Text, Choice, Signature, Unknown
value FieldValue Valor actual del campo
tooltip Option<String> Información emergente de la clave /TU
bounds Option<[f64; 4]> Caja delimitadora [x1, y1, x2, y2]
flags Option<u32> Flags del campo (ReadOnly, Required, NoExport)
default_value Option<FieldValue> Valor predeterminado de la clave /DV
max_length Option<u32> Longitud máxima para campos de texto

Variantes FieldType

Variante Descripción
Button Casilla de verificación, botón de opción o botón de acción (/Btn)
Text Campo de texto de una o varias líneas (/Tx)
Choice Cuadro de lista o cuadro combinado (/Ch)
Signature Campo de firma digital (/Sig)
Unknown(String) Tipo de campo no reconocido

Variantes FieldValue

Variante Descripción
Text(String) Valor de cadena de texto
Boolean(bool) Valor booleano (casillas de verificación)
Name(String) Valor de nombre (botones de opción, campos de selección)
Array(Vec<String>) Múltiples valores (listas de selección múltiple)
None Sin valor presente

Avanzado: Verificar Campos Obligatorios

Python

from pdf_oxide import PdfDocument

doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()

missing = [f for f in fields if f.is_required and not f.value]
if missing:
    print("Missing required fields:")
    for f in missing:
        print(f"  - {f.name}")

Rust

use pdf_oxide::extractors::{FormExtractor, FieldValue};
use pdf_oxide::PdfDocument;

let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;

let required_empty: Vec<_> = fields.iter()
    .filter(|f| {
        f.flags.map_or(false, |flags| flags & 0x02 != 0)
            && matches!(f.value, FieldValue::None | FieldValue::Text(ref s) if s.is_empty())
    })
    .collect();

if !required_empty.is_empty() {
    println!("Missing required fields:");
    for f in &required_empty {
        println!("  - {}", f.full_name);
    }
}

C++

auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
    if (field.required && field.value.empty()) {
        std::cout << "Missing required field: " << field.name << "\n";
    }
}

Swift

let doc = try Document.open("form.pdf")
for field in try doc.formFields() where field.required && field.value.isEmpty {
    print("Missing required field: \(field.name)")
}

Dart

final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
  if (field.required && field.value.isEmpty) {
    print('Missing required field: ${field.name}');
  }
}

R

doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
  if (field$required && field$value == "") {
    cat(sprintf("Missing required field: %s\n", field$name))
  }
}

Julia

doc = open_document("form.pdf")
for field in get_form_fields(doc)
    if field.required && isempty(field.value)
        println("Missing required field: $(field.name)")
    end
end

Zig

var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
    const value = try fields.getValue(a, i);
    if (try fields.isRequired(i) and value.len == 0) {
        const name = try fields.getName(a, i);
        std.debug.print("Missing required field: {s}\n", .{name});
    }
}

Objective-C

POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
    if (field.required && field.value.length == 0) {
        NSLog(@"Missing required field: %@", field.name);
    }
}

Elixir

{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
fields
|> Enum.filter(fn f -> f.required and f.value in [nil, ""] end)
|> Enum.each(fn f -> IO.puts("Missing required field: #{f.name}") end)

Páginas Relacionadas