Form Data Extraction
PDF Oxide extracts interactive form fields (AcroForms) from PDF documents, including text fields, checkboxes, radio buttons, choice fields, and signatures. Extracted form data can be exported to FDF or XFDF format for interchange. XFA forms (XML Forms Architecture) can be analyzed and converted as well.
Quick Example
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
for field in fields:
print(f"{field.name} ({field.field_type}): {field.value}")
Node.js
const { PdfDocument } = require("pdf-oxide");
const doc = new PdfDocument("form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
doc.close();
Go
import pdfoxide "github.com/yfedoseev/pdf_oxide/go"
doc, _ := pdfoxide.Open("form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
fmt.Printf("%s (%s): %s\n", field.Name, field.FieldType, field.Value)
}
C#
using PdfOxide.Core;
using var doc = PdfDocument.Open("form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
Console.WriteLine($"{field.Name} ({field.FieldType}): {field.Value}");
}
WASM
const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
Rust
use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
println!("{} ({:?}): {:?}", field.full_name, field.field_type, field.value);
}
Java
import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;
try (PdfDocument doc = PdfDocument.open("form.pdf")) {
for (FormField field : doc.formFields()) {
System.out.printf("%s (%s): %s%n",
field.name(), field.type(), field.value().orElse(""));
}
}
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('form.pdf') do |doc|
doc.form_fields.each do |field|
puts "#{field[:name]} (#{field[:type]}): #{field[:value]}"
end
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
std::cout << field.name << " (" << field.type << "): " << field.value << "\n";
}
Swift
import PdfOxide
let doc = try Document.open("form.pdf")
for field in try doc.formFields() {
print("\(field.name) (\(field.type)): \(field.value)")
}
Kotlin
import fyi.oxide.pdf.PdfDocument
PdfDocument.open("form.pdf").use { doc ->
doc.formFields().forEach { field ->
println("${field.name()} (${field.type()}): ${field.valueOrNull() ?: ""}")
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
print('${field.name} (${field.type}): ${field.value}');
}
R
library(pdfoxide)
doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
cat(sprintf("%s (%s): %s\n", field$name, field$type, field$value))
}
Julia
using PdfOxide
doc = open_document("form.pdf")
for field in get_form_fields(doc)
println("$(field.name) ($(field.type)): $(field.value)")
end
Zig
const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;
var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const name = try fields.getName(a, i);
const ftype = try fields.getType(a, i);
const value = try fields.getValue(a, i);
std.debug.print("{s} ({s}): {s}\n", .{ name, ftype, value });
}
Scala
import fyi.oxide.pdf.{PdfDocument, valueOption}
import scala.util.Using
Using.resource(PdfDocument.open("form.pdf")) { doc =>
doc.formFieldsSeq.foreach { field =>
println(s"${field.name} (${field.`type`}): ${field.valueOption.getOrElse("")}")
}
}
Clojure
(require '[pdf-oxide.core :as pdf])
(with-open [doc (pdf/open "form.pdf")]
(doseq [field (pdf/form-fields doc)]
(println (format "%s (%s): %s"
(.name field) (.type field) (.orElse (.value field) "")))))
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
NSLog(@"%@ (%@): %@", field.name, field.type, field.value);
}
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
IO.puts("#{field.name} (#{field.type}): #{field.value}")
end)
Migrating from PyMuPDF get_form_fields()
If you’re switching from PyMuPDF, the API is similar but PDF Oxide returns richer data and handles XFA forms:
PyMuPDF:
import fitz
doc = fitz.open("form.pdf")
# Returns dict of {field_name: field_value} — loses type info
fields = doc.get_form_fields()
# Or iterate widgets for more detail
for page in doc:
for widget in page.widgets():
print(widget.field_name, widget.field_value)
PDF Oxide:
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Returns structured objects with name, value, type, options, rect
fields = doc.get_form_fields()
for field in fields:
print(f"{field.name} ({field.field_type}): {field.value}")
# Also handles XFA forms that PyMuPDF cannot read
xfa = doc.has_xfa()
Key differences:
- PDF Oxide returns structured field objects (not just a dict)
- Includes field type, bounding rect, and options for choice fields
- Supports XFA forms — PyMuPDF’s
get_form_fields()returns empty for XFA-only PDFs - Export to FDF/XFDF format for form data interchange
For a complete migration guide covering PyMuPDF, pypdf, pdfplumber, and pdfminer, see Migrate to PDF Oxide.
Reading Form Fields
Get All Fields
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("tax-form.pdf")
fields = doc.get_form_fields()
for field in fields:
print(f"Name: {field.name}")
print(f" Type: {field.field_type}")
print(f" Value: {field.value}")
print(f" Required: {field.is_required}")
print(f" Read-only: {field.is_readonly}")
if field.max_length:
print(f" Max length: {field.max_length}")
Node.js
const doc = new PdfDocument("tax-form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`Name: ${field.name}`);
console.log(` Type: ${field.fieldType}`);
console.log(` Value: ${field.value}`);
}
doc.close();
Go
doc, _ := pdfoxide.Open("tax-form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
fmt.Printf("Name: %s\n", field.Name)
fmt.Printf(" Type: %s\n", field.FieldType)
fmt.Printf(" Value: %s\n", field.Value)
}
C#
using var doc = PdfDocument.Open("tax-form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
Console.WriteLine($"Name: {field.Name}");
Console.WriteLine($" Type: {field.FieldType}");
Console.WriteLine($" Value: {field.Value}");
}
WASM
const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`Name: ${field.name}`);
console.log(` Type: ${field.fieldType}`);
console.log(` Value: ${field.value}`);
console.log(` Flags: ${field.flags}`);
}
Rust
use pdf_oxide::extractors::{FormExtractor, FieldType};
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("tax-form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
let type_str = match &field.field_type {
FieldType::Button => "Button",
FieldType::Text => "Text",
FieldType::Choice => "Choice",
FieldType::Signature => "Signature",
FieldType::Unknown(s) => s.as_str(),
};
println!("[{}] {} = {:?}", type_str, field.full_name, field.value);
if let Some(tooltip) = &field.tooltip {
println!(" Tooltip: {}", tooltip);
}
if let Some(bounds) = &field.bounds {
println!(" Bounds: [{:.1}, {:.1}, {:.1}, {:.1}]",
bounds[0], bounds[1], bounds[2], bounds[3]);
}
}
Java
import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;
try (PdfDocument doc = PdfDocument.open("tax-form.pdf")) {
for (FormField field : doc.formFields()) {
System.out.println("Name: " + field.name());
System.out.println(" Type: " + field.type());
System.out.println(" Value: " + field.value().orElse(""));
}
}
Ruby
PdfOxide::PdfDocument.open('tax-form.pdf') do |doc|
doc.form_fields.each do |field|
puts "Name: #{field[:name]}"
puts " Type: #{field[:type]}"
puts " Value: #{field[:value]}"
end
end
C++
auto doc = pdf_oxide::Document::open("tax-form.pdf");
for (const auto& field : doc.get_form_fields()) {
std::cout << "Name: " << field.name << "\n";
std::cout << " Type: " << field.type << "\n";
std::cout << " Value: " << field.value << "\n";
std::cout << " Read-only: " << field.readonly << "\n";
std::cout << " Required: " << field.required << "\n";
}
Swift
let doc = try Document.open("tax-form.pdf")
for field in try doc.formFields() {
print("Name: \(field.name)")
print(" Type: \(field.type)")
print(" Value: \(field.value)")
print(" Read-only: \(field.readonly)")
print(" Required: \(field.required)")
}
Kotlin
PdfDocument.open("tax-form.pdf").use { doc ->
doc.formFields().forEach { field ->
println("Name: ${field.name()}")
println(" Type: ${field.type()}")
println(" Value: ${field.valueOrNull() ?: ""}")
}
}
Dart
final doc = PdfDocument.open('tax-form.pdf');
for (final field in doc.getFormFields()) {
print('Name: ${field.name}');
print(' Type: ${field.type}');
print(' Value: ${field.value}');
print(' Read-only: ${field.readonly}');
print(' Required: ${field.required}');
}
R
doc <- pdf_open("tax-form.pdf")
for (field in pdf_get_form_fields(doc)) {
cat(sprintf("Name: %s\n", field$name))
cat(sprintf(" Type: %s\n", field$type))
cat(sprintf(" Value: %s\n", field$value))
cat(sprintf(" Read-only: %s\n", field$readonly))
cat(sprintf(" Required: %s\n", field$required))
}
Julia
doc = open_document("tax-form.pdf")
for field in get_form_fields(doc)
println("Name: $(field.name)")
println(" Type: $(field.type)")
println(" Value: $(field.value)")
println(" Read-only: $(field.readonly)")
println(" Required: $(field.required)")
end
Zig
var doc = try pdf_oxide.Document.open("tax-form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const name = try fields.getName(a, i);
const ftype = try fields.getType(a, i);
const value = try fields.getValue(a, i);
std.debug.print("Name: {s}\n Type: {s}\n Value: {s}\n", .{ name, ftype, value });
std.debug.print(" Read-only: {}\n Required: {}\n", .{
try fields.isReadonly(i), try fields.isRequired(i),
});
}
Scala
Using.resource(PdfDocument.open("tax-form.pdf")) { doc =>
doc.formFieldsSeq.foreach { field =>
println(s"Name: ${field.name}")
println(s" Type: ${field.`type`}")
println(s" Value: ${field.valueOption.getOrElse("")}")
}
}
Clojure
(with-open [doc (pdf/open "tax-form.pdf")]
(doseq [field (pdf/form-fields doc)]
(println "Name:" (.name field))
(println " Type:" (.type field))
(println " Value:" (.orElse (.value field) ""))))
Objective-C
POXDocument *doc = [POXDocument openPath:@"tax-form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
NSLog(@"Name: %@", field.name);
NSLog(@" Type: %@", field.type);
NSLog(@" Value: %@", field.value);
NSLog(@" Read-only: %d", field.readonly);
NSLog(@" Required: %d", field.required);
}
Elixir
{:ok, doc} = PdfOxide.open("tax-form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
IO.puts("Name: #{field.name}")
IO.puts(" Type: #{field.type}")
IO.puts(" Value: #{field.value}")
IO.puts(" Read-only: #{field.read_only}")
IO.puts(" Required: #{field.required}")
end)
Get a Specific Field Value
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
name = doc.get_form_field_value("employee_name")
ssn = doc.get_form_field_value("ssn")
agreed = doc.get_form_field_value("agree_to_terms")
print(f"Name: {name}") # "John Doe"
print(f"SSN: {ssn}") # "123-45-6789"
print(f"Agreed: {agreed}") # True
WASM
const doc = new WasmPdfDocument(bytes);
const name = doc.getFormFieldValue("employee_name");
const ssn = doc.getFormFieldValue("ssn");
const agreed = doc.getFormFieldValue("agree_to_terms");
console.log(`Name: ${name}`); // "John Doe"
console.log(`SSN: ${ssn}`); // "123-45-6789"
console.log(`Agreed: ${agreed}`); // true
Rust
use pdf_oxide::editor::{DocumentEditor, EditableDocument};
let mut editor = DocumentEditor::open("form.pdf")?;
if let Some(value) = editor.get_form_field_value("employee_name")? {
println!("Name: {:?}", value);
}
Filling Forms
Set Field Values
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Set text fields
doc.set_form_field_value("full_name", "Jane Doe")
doc.set_form_field_value("email", "jane@example.com")
# Set checkboxes
doc.set_form_field_value("agree_to_terms", True)
# Save the filled form
doc.save("filled_form.pdf")
WASM
const doc = new WasmPdfDocument(bytes);
// Set text fields
doc.setFormFieldValue("full_name", "Jane Doe");
doc.setFormFieldValue("email", "jane@example.com");
// Set checkboxes
doc.setFormFieldValue("agree_to_terms", true);
// Save the filled form
const filledBytes = doc.save();
Rust
use pdf_oxide::editor::{DocumentEditor, EditableDocument, FormFieldValue};
let mut editor = DocumentEditor::open("form.pdf")?;
// Set text fields
editor.set_form_field_value("full_name", FormFieldValue::Text("Jane Doe".into()))?;
editor.set_form_field_value("email", FormFieldValue::Text("jane@example.com".into()))?;
// Set checkboxes
editor.set_form_field_value("agree_to_terms", FormFieldValue::Boolean(true))?;
// Set choice fields
editor.set_form_field_value("state", FormFieldValue::Choice("California".into()))?;
editor.save("filled_form.pdf")?;
Java
import fyi.oxide.pdf.DocumentEditor;
try (DocumentEditor editor = DocumentEditor.open("form.pdf")) {
// Set text fields
editor.setFormField("full_name", "Jane Doe");
editor.setFormField("email", "jane@example.com");
// Set checkboxes
editor.setFormField("agree_to_terms", true);
editor.saveTo(java.nio.file.Path.of("filled_form.pdf"));
}
Ruby
PdfOxide::DocumentEditor.open('form.pdf') do |editor|
# Set text fields
editor.set_form_field('full_name', 'Jane Doe')
editor.set_form_field('email', 'jane@example.com')
# Set checkboxes
editor.set_form_field('agree_to_terms', true)
editor.save_to('filled_form.pdf')
end
C++
auto editor = pdf_oxide::DocumentEditor::open("form.pdf");
// Set text fields
editor.set_form_field_value("full_name", "Jane Doe");
editor.set_form_field_value("email", "jane@example.com");
// Set checkboxes
editor.set_form_field_value("agree_to_terms", "true");
editor.save("filled_form.pdf");
Swift
let editor = try DocumentEditor.openEditor("form.pdf")
// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe")
try editor.setFormFieldValue("email", "jane@example.com")
// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true")
try editor.save("filled_form.pdf")
Kotlin
import fyi.oxide.pdf.DocumentEditor
DocumentEditor.open("form.pdf").use { editor ->
// Set text fields
editor.setFormField("full_name", "Jane Doe")
editor.setFormField("email", "jane@example.com")
// Set checkboxes
editor.setFormField("agree_to_terms", true)
editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}
Dart
final editor = DocumentEditor.open('form.pdf');
// Set text fields
editor.setFormFieldValue('full_name', 'Jane Doe');
editor.setFormFieldValue('email', 'jane@example.com');
// Set checkboxes
editor.setFormFieldValue('agree_to_terms', 'true');
editor.save('filled_form.pdf');
R
editor <- pdf_editor_open("form.pdf")
# Set text fields
pdf_editor_set_form_field_value(editor, "full_name", "Jane Doe")
pdf_editor_set_form_field_value(editor, "email", "jane@example.com")
# Set checkboxes
pdf_editor_set_form_field_value(editor, "agree_to_terms", "true")
pdf_editor_save(editor, "filled_form.pdf")
Julia
editor = open_editor("form.pdf")
# Set text fields
set_form_field_value(editor, "full_name", "Jane Doe")
set_form_field_value(editor, "email", "jane@example.com")
# Set checkboxes
set_form_field_value(editor, "agree_to_terms", "true")
save(editor, "filled_form.pdf")
Zig
var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();
// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe");
try editor.setFormFieldValue("email", "jane@example.com");
// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true");
try editor.save("filled_form.pdf");
Scala
import fyi.oxide.pdf.DocumentEditor
import scala.util.Using
Using.resource(DocumentEditor.open("form.pdf")) { editor =>
// Set text fields
editor.setFormField("full_name", "Jane Doe")
editor.setFormField("email", "jane@example.com")
// Set checkboxes
editor.setFormField("agree_to_terms", true)
editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}
Clojure
(with-open [editor (pdf/editor "form.pdf")]
;; Set text fields
(.setFormField editor "full_name" "Jane Doe")
(.setFormField editor "email" "jane@example.com")
;; Set checkboxes
(.setFormField editor "agree_to_terms" true)
(.saveTo editor (java.nio.file.Path/of "filled_form.pdf" (make-array String 0))))
Objective-C
POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
// Set text fields
[editor setFormField:@"full_name" value:@"Jane Doe" error:&err];
[editor setFormField:@"email" value:@"jane@example.com" error:&err];
// Set checkboxes
[editor setFormField:@"agree_to_terms" value:@"true" error:&err];
[editor saveToPath:@"filled_form.pdf" error:&err];
Elixir
{:ok, editor} = PdfOxide.open_editor("form.pdf")
# Set text fields
PdfOxide.set_form_field_value(editor, "full_name", "Jane Doe")
PdfOxide.set_form_field_value(editor, "email", "jane@example.com")
# Set checkboxes
PdfOxide.set_form_field_value(editor, "agree_to_terms", "true")
PdfOxide.editor_save(editor, "filled_form.pdf")
Exporting Form Data
Export form field data as FDF or XFDF for interchange with other applications.
FDF Export
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.fdf")
WASM
const doc = new WasmPdfDocument(bytes);
const fdfBytes = doc.exportFormData("fdf");
// fdfBytes is a Uint8Array
Rust
use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let fdf_bytes = FormExtractor::export_fdf(&mut doc, fields)?;
std::fs::write("form_data.fdf", &fdf_bytes)?;
C++
auto doc = pdf_oxide::Document::open("form.pdf");
auto fdf = doc.export_form_data_to_bytes(0); // 0 = FDF
std::ofstream("form_data.fdf", std::ios::binary)
.write(reinterpret_cast<const char*>(fdf.data()), fdf.size());
Swift
let doc = try Document.open("form.pdf")
let fdf = try doc.exportFormData(formatType: 0) // 0 = FDF
try Data(fdf).write(to: URL(fileURLWithPath: "form_data.fdf"))
Dart
final doc = PdfDocument.open('form.pdf');
final fdf = doc.exportFormDataToBytes(0); // 0 = FDF
File('form_data.fdf').writeAsBytesSync(fdf);
R
doc <- pdf_open("form.pdf")
fdf <- pdf_export_form_data_to_bytes(doc, format_type = 0L) # 0 = FDF
writeBin(fdf, "form_data.fdf")
Julia
doc = open_document("form.pdf")
fdf = export_form_data_to_bytes(doc, 0) # 0 = FDF
write("form_data.fdf", fdf)
Zig
var doc = try pdf_oxide.Document.open("form.pdf");
const fdf = try doc.exportFormDataToBytes(a, 0); // 0 = FDF
defer a.free(fdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.fdf", .data = fdf });
Objective-C
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *fdf = [doc exportFormDataToBytes:0 error:&err]; // 0 = FDF
[fdf writeToFile:@"form_data.fdf" atomically:YES];
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fdf} = PdfOxide.export_form_data_to_bytes(doc, 0) # 0 = FDF
File.write!("form_data.fdf", fdf)
XFDF Export
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.xfdf", format="xfdf")
WASM
const doc = new WasmPdfDocument(bytes);
const xfdfBytes = doc.exportFormData("xfdf");
Rust
use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let xfdf = FormExtractor::export_xfdf(&mut doc, fields)?;
std::fs::write("form_data.xfdf", &xfdf)?;
C++
auto doc = pdf_oxide::Document::open("form.pdf");
auto xfdf = doc.export_form_data_to_bytes(1); // 1 = XFDF
std::ofstream("form_data.xfdf", std::ios::binary)
.write(reinterpret_cast<const char*>(xfdf.data()), xfdf.size());
Swift
let doc = try Document.open("form.pdf")
let xfdf = try doc.exportFormData(formatType: 1) // 1 = XFDF
try Data(xfdf).write(to: URL(fileURLWithPath: "form_data.xfdf"))
Dart
final doc = PdfDocument.open('form.pdf');
final xfdf = doc.exportFormDataToBytes(1); // 1 = XFDF
File('form_data.xfdf').writeAsBytesSync(xfdf);
R
doc <- pdf_open("form.pdf")
xfdf <- pdf_export_form_data_to_bytes(doc, format_type = 1L) # 1 = XFDF
writeBin(xfdf, "form_data.xfdf")
Julia
doc = open_document("form.pdf")
xfdf = export_form_data_to_bytes(doc, 1) # 1 = XFDF
write("form_data.xfdf", xfdf)
Zig
var doc = try pdf_oxide.Document.open("form.pdf");
const xfdf = try doc.exportFormDataToBytes(a, 1); // 1 = XFDF
defer a.free(xfdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.xfdf", .data = xfdf });
Objective-C
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *xfdf = [doc exportFormDataToBytes:1 error:&err]; // 1 = XFDF
[xfdf writeToFile:@"form_data.xfdf" atomically:YES];
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, xfdf} = PdfOxide.export_form_data_to_bytes(doc, 1) # 1 = XFDF
File.write!("form_data.xfdf", xfdf)
Importing the data back
Export is the read side of the round-trip; the write side – loading FDF/XFDF
data back into a PDF – lives in the editing docs. The C ABI and Swift binding
declare pdf_editor_import_fdf_bytes, pdf_editor_import_xfdf_bytes,
pdf_form_import_from_file, and pdf_document_import_form_data, though in
v0.3.69 those return the Unsupported status and the portable approach is to
parse the FDF/XFDF and call set_form_field_value. See
Form Field Editing -> Importing Form Data for complete,
runnable import examples.
Form Fields in Markdown/HTML
Form field values are included in Markdown and HTML conversion by default. Use include_form_fields to control this.
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Include form field values (default)
md = doc.to_markdown(0, include_form_fields=True)
# Exclude form fields
md = doc.to_markdown(0, include_form_fields=False)
WASM
const doc = new WasmPdfDocument(bytes);
// Include form fields (default: true)
const md = doc.toMarkdown(0, true, true, true);
// Exclude form fields (4th parameter)
const md2 = doc.toMarkdown(0, true, true, false);
Rust
use pdf_oxide::PdfDocument;
use pdf_oxide::converters::ConversionOptions;
let doc = PdfDocument::open("form.pdf")?;
let options = ConversionOptions {
include_form_fields: true,
..Default::default()
};
let md = doc.to_markdown(0, &options)?;
Flattening Forms
Flatten form fields into page content so they become non-editable. Useful for creating finalized PDFs.
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Flatten all form fields
doc.flatten_forms()
doc.save("flattened.pdf")
# Or flatten a single page
doc2 = PdfDocument("form.pdf")
doc2.flatten_forms_on_page(0)
doc2.save("flattened_page0.pdf")
WASM
const doc = new WasmPdfDocument(bytes);
// Flatten all form fields
doc.flattenForms();
const flattened = doc.save();
// Or flatten a single page
const doc2 = new WasmPdfDocument(bytes);
doc2.flattenFormsOnPage(0);
const flattened2 = doc2.save();
Rust
use pdf_oxide::Pdf;
let mut pdf = Pdf::open("form.pdf")?;
// Mark a specific page for flattening
pdf.flatten_page_annotations(0);
pdf.save("flattened.pdf")?;
// Or flatten all pages
let mut pdf2 = Pdf::open("form.pdf")?;
pdf2.flatten_all_annotations();
pdf2.save("flattened_all.pdf")?;
C++
auto editor = pdf_oxide::DocumentEditor::open("form.pdf");
// Flatten all form fields
editor.flatten_forms();
editor.save("flattened.pdf");
// Or flatten a single page
auto editor2 = pdf_oxide::DocumentEditor::open("form.pdf");
editor2.flatten_forms_on_page(0);
editor2.save("flattened_page0.pdf");
Swift
let editor = try DocumentEditor.openEditor("form.pdf")
// Flatten all form fields
try editor.flattenForms()
try editor.save("flattened.pdf")
// Or flatten a single page
let editor2 = try DocumentEditor.openEditor("form.pdf")
try editor2.flattenFormsOnPage(0)
try editor2.save("flattened_page0.pdf")
Dart
final editor = DocumentEditor.open('form.pdf');
// Flatten all form fields
editor.flattenForms();
editor.save('flattened.pdf');
// Or flatten a single page
final editor2 = DocumentEditor.open('form.pdf');
editor2.flattenFormsOnPage(0);
editor2.save('flattened_page0.pdf');
R
editor <- pdf_editor_open("form.pdf")
# Flatten all form fields
pdf_editor_flatten_forms(editor)
pdf_editor_save(editor, "flattened.pdf")
# Or flatten a single page
editor2 <- pdf_editor_open("form.pdf")
pdf_editor_flatten_forms_on_page(editor2, 0)
pdf_editor_save(editor2, "flattened_page0.pdf")
Julia
editor = open_editor("form.pdf")
# Flatten all form fields
flatten_forms(editor)
save(editor, "flattened.pdf")
# Or flatten a single page
editor2 = open_editor("form.pdf")
flatten_forms_on_page(editor2, 0)
save(editor2, "flattened_page0.pdf")
Zig
var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();
// Flatten all form fields
try editor.flattenForms();
try editor.save("flattened.pdf");
// Or flatten a single page
var editor2 = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor2.deinit();
try editor2.flattenFormsOnPage(0);
try editor2.save("flattened_page0.pdf");
Objective-C
POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
// Flatten all form fields
[editor flattenForms:&err];
[editor saveToPath:@"flattened.pdf" error:&err];
// Or flatten a single page
POXDocumentEditor *editor2 = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
[editor2 flattenFormsOnPage:0 error:&err];
[editor2 saveToPath:@"flattened_page0.pdf" error:&err];
Elixir
{:ok, editor} = PdfOxide.open_editor("form.pdf")
# Flatten all form fields
PdfOxide.flatten_forms(editor)
PdfOxide.editor_save(editor, "flattened.pdf")
# Or flatten a single page
{:ok, editor2} = PdfOxide.open_editor("form.pdf")
PdfOxide.flatten_forms_on_page(editor2, 0)
PdfOxide.editor_save(editor2, "flattened_page0.pdf")
XFA Forms
Analyze XFA (XML Forms Architecture) form content. XFA forms use XML-based templates rather than AcroForm fields and are common in government and enterprise forms.
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("xfa-form.pdf")
if doc.has_xfa():
print("This document contains an XFA form")
fields = doc.get_form_fields() # Extracts AcroForm fallback fields
for field in fields:
print(f" {field.name}: {field.value}")
Node.js
const doc = new PdfDocument("xfa-form.pdf");
if (doc.hasXFA()) {
console.log("This document contains an XFA form");
const fields = doc.getFormFields();
for (const field of fields) {
console.log(` ${field.name}: ${field.value}`);
}
}
doc.close();
Go
doc, _ := pdfoxide.Open("xfa-form.pdf")
defer doc.Close()
if doc.HasXfa() {
fmt.Println("This document contains an XFA form")
fields, _ := doc.FormFields()
for _, field := range fields {
fmt.Printf(" %s: %s\n", field.Name, field.Value)
}
}
C#
using var doc = PdfDocument.Open("xfa-form.pdf");
if (doc.HasXfa)
{
Console.WriteLine("This document contains an XFA form");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
Console.WriteLine($" {field.Name}: {field.Value}");
}
}
WASM
const doc = new WasmPdfDocument(bytes);
if (doc.hasXfa()) {
console.log("This document contains an XFA form");
const fields = doc.getFormFields(); // AcroForm fallback fields
for (const field of fields) {
console.log(` ${field.name}: ${field.value}`);
}
}
Rust
use pdf_oxide::xfa::analyze_xfa_document;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("xfa-form.pdf")?;
let analysis = analyze_xfa_document(&mut doc)?;
println!("XFA form detected: {} fields", analysis.fields.len());
for field in &analysis.fields {
println!(" {} ({:?})", field.name, field.field_type);
}
C++
auto doc = pdf_oxide::Document::open("xfa-form.pdf");
if (doc.has_xfa()) {
std::cout << "This document contains an XFA form\n";
for (const auto& field : doc.get_form_fields()) { // AcroForm fallback
std::cout << " " << field.name << ": " << field.value << "\n";
}
}
Swift
let doc = try Document.open("xfa-form.pdf")
if try doc.hasXfa() {
print("This document contains an XFA form")
for field in try doc.formFields() { // AcroForm fallback
print(" \(field.name): \(field.value)")
}
}
Dart
final doc = PdfDocument.open('xfa-form.pdf');
if (doc.hasXfa()) {
print('This document contains an XFA form');
for (final field in doc.getFormFields()) { // AcroForm fallback
print(' ${field.name}: ${field.value}');
}
}
R
doc <- pdf_open("xfa-form.pdf")
if (pdf_has_xfa(doc)) {
cat("This document contains an XFA form\n")
for (field in pdf_get_form_fields(doc)) { # AcroForm fallback
cat(sprintf(" %s: %s\n", field$name, field$value))
}
}
Julia
doc = open_document("xfa-form.pdf")
if has_xfa(doc)
println("This document contains an XFA form")
for field in get_form_fields(doc) # AcroForm fallback
println(" $(field.name): $(field.value)")
end
end
Zig
var doc = try pdf_oxide.Document.open("xfa-form.pdf");
if (doc.hasXfa()) {
std.debug.print("This document contains an XFA form\n", .{});
var fields = try doc.formFields(); // AcroForm fallback
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const name = try fields.getName(a, i);
const value = try fields.getValue(a, i);
std.debug.print(" {s}: {s}\n", .{ name, value });
}
}
Objective-C
POXDocument *doc = [POXDocument openPath:@"xfa-form.pdf" error:&err];
if ([doc hasXfa]) {
NSLog(@"This document contains an XFA form");
for (POXFormField *field in [doc formFieldsWithError:&err]) { // AcroForm fallback
NSLog(@" %@: %@", field.name, field.value);
}
}
Elixir
{:ok, doc} = PdfOxide.open("xfa-form.pdf")
if PdfOxide.has_xfa?(doc) do
IO.puts("This document contains an XFA form")
{:ok, fields} = PdfOxide.form_fields(doc) # AcroForm fallback
Enum.each(fields, fn field -> IO.puts(" #{field.name}: #{field.value}") end)
end
API Reference
Python API
| Method | Description |
|---|---|
doc.get_form_fields() |
Get all form fields as FormField objects |
doc.get_form_field_value(name) |
Get value of a specific field by name |
doc.set_form_field_value(name, value) |
Set value of a form field |
doc.export_form_data(path, format="fdf") |
Export form data to FDF or XFDF file |
doc.has_xfa() |
Check if the document contains an XFA form |
doc.flatten_forms() |
Flatten all form fields into page content |
doc.flatten_forms_on_page(page) |
Flatten form fields on a specific page |
Python FormField Properties
| Property | Type | Description |
|---|---|---|
name |
str |
Field name |
field_type |
str |
Field type (text, checkbox, radio, choice, signature) |
value |
str | bool | None |
Current field value |
is_required |
bool |
Whether the field is required |
is_readonly |
bool |
Whether the field is read-only |
max_length |
int | None |
Maximum length for text fields |
JavaScript API
| Method | Description |
|---|---|
doc.getFormFields() |
Get all form fields |
doc.getFormFieldValue(name) |
Get value of a specific field by name |
doc.setFormFieldValue(name, value) |
Set value of a form field |
doc.exportFormData(format?) |
Export as FDF (default) or XFDF, returns Uint8Array |
doc.hasXfa() |
Check if the document contains an XFA form |
doc.flattenForms() |
Flatten all form fields into page content |
doc.flattenFormsOnPage(pageIndex) |
Flatten form fields on a specific page |
JavaScript FormField Properties
| Property | Type | Description |
|---|---|---|
name |
string |
Field name |
fieldType |
string |
Field type |
value |
string | boolean | null |
Current value |
flags |
number |
Field flags |
Rust API
| Function | Description |
|---|---|
FormExtractor::extract_fields(doc) |
Extract all form fields from AcroForm dictionary |
FormExtractor::export_fdf(doc, fields) |
Export as FDF bytes |
FormExtractor::export_xfdf(doc, fields) |
Export as XFDF string |
analyze_xfa_document(doc) |
Analyze XFA form structure |
editor.get_form_fields() |
Get fields via DocumentEditor |
editor.get_form_field_value(name) |
Get field value by name |
editor.set_form_field_value(name, value) |
Set field value |
FormField Fields (Rust)
| Field | Type | Description |
|---|---|---|
name |
String |
Field name from /T key |
full_name |
String |
Fully qualified name (dot-separated) |
field_type |
FieldType |
Button, Text, Choice, Signature, Unknown |
value |
FieldValue |
Current field value |
tooltip |
Option<String> |
Tooltip from /TU key |
bounds |
Option<[f64; 4]> |
Bounding box [x1, y1, x2, y2] |
flags |
Option<u32> |
Field flags (ReadOnly, Required, NoExport) |
default_value |
Option<FieldValue> |
Default value from /DV key |
max_length |
Option<u32> |
Max length for text fields |
FieldType Variants
| Variant | Description |
|---|---|
Button |
Checkbox, radio button, or push button (/Btn) |
Text |
Single or multi-line text field (/Tx) |
Choice |
List box or combo box (/Ch) |
Signature |
Digital signature field (/Sig) |
Unknown(String) |
Unrecognized field type |
FieldValue Variants
| Variant | Description |
|---|---|
Text(String) |
Text string value |
Boolean(bool) |
Boolean value (checkboxes) |
Name(String) |
Name value (radio buttons, choice fields) |
Array(Vec<String>) |
Multiple values (multi-select list boxes) |
None |
No value present |
Advanced: Check Required Fields
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
missing = [f for f in fields if f.is_required and not f.value]
if missing:
print("Missing required fields:")
for f in missing:
print(f" - {f.name}")
Rust
use pdf_oxide::extractors::{FormExtractor, FieldValue};
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let required_empty: Vec<_> = fields.iter()
.filter(|f| {
f.flags.map_or(false, |flags| flags & 0x02 != 0)
&& matches!(f.value, FieldValue::None | FieldValue::Text(ref s) if s.is_empty())
})
.collect();
if !required_empty.is_empty() {
println!("Missing required fields:");
for f in &required_empty {
println!(" - {}", f.full_name);
}
}
C++
auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
if (field.required && field.value.empty()) {
std::cout << "Missing required field: " << field.name << "\n";
}
}
Swift
let doc = try Document.open("form.pdf")
for field in try doc.formFields() where field.required && field.value.isEmpty {
print("Missing required field: \(field.name)")
}
Dart
final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
if (field.required && field.value.isEmpty) {
print('Missing required field: ${field.name}');
}
}
R
doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
if (field$required && field$value == "") {
cat(sprintf("Missing required field: %s\n", field$name))
}
}
Julia
doc = open_document("form.pdf")
for field in get_form_fields(doc)
if field.required && isempty(field.value)
println("Missing required field: $(field.name)")
end
end
Zig
var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const value = try fields.getValue(a, i);
if (try fields.isRequired(i) and value.len == 0) {
const name = try fields.getName(a, i);
std.debug.print("Missing required field: {s}\n", .{name});
}
}
Objective-C
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
if (field.required && field.value.length == 0) {
NSLog(@"Missing required field: %@", field.name);
}
}
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
fields
|> Enum.filter(fn f -> f.required and f.value in [nil, ""] end)
|> Enum.each(fn f -> IO.puts("Missing required field: #{f.name}") end)
Related Pages
- Fill PDF Forms – Step-by-step form filling guide
- Annotation Extraction – Access annotations alongside form fields
- Text Extraction – Extract text content from pages
- Metadata & XMP – Read document-level properties