폼 데이터 추출
PDF Oxide는 PDF 문서에서 텍스트 필드, 체크박스, 라디오 버튼, 선택 필드, 서명 등 대화형 폼 필드(AcroForms)를 추출합니다. 추출된 폼 데이터는 다른 애플리케이션과의 교환을 위해 FDF 또는 XFDF 형식으로 내보낼 수 있습니다. XFA 폼(XML Forms Architecture)의 분석 및 변환도 지원합니다.
빠른 예시
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
for field in fields:
print(f"{field.name} ({field.field_type}): {field.value}")
Node.js
const { PdfDocument } = require("pdf-oxide");
const doc = new PdfDocument("form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
doc.close();
Go
import pdfoxide "github.com/yfedoseev/pdf_oxide/go"
doc, _ := pdfoxide.Open("form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
fmt.Printf("%s (%s): %s\n", field.Name, field.FieldType, field.Value)
}
C#
using PdfOxide.Core;
using var doc = PdfDocument.Open("form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
Console.WriteLine($"{field.Name} ({field.FieldType}): {field.Value}");
}
WASM
const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`${field.name} (${field.fieldType}): ${field.value}`);
}
Rust
use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
println!("{} ({:?}): {:?}", field.full_name, field.field_type, field.value);
}
Java
import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;
try (PdfDocument doc = PdfDocument.open("form.pdf")) {
for (FormField field : doc.formFields()) {
System.out.printf("%s (%s): %s%n",
field.name(), field.type(), field.value().orElse(""));
}
}
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('form.pdf') do |doc|
doc.form_fields.each do |field|
puts "#{field[:name]} (#{field[:type]}): #{field[:value]}"
end
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
std::cout << field.name << " (" << field.type << "): " << field.value << "\n";
}
Swift
import PdfOxide
let doc = try Document.open("form.pdf")
for field in try doc.formFields() {
print("\(field.name) (\(field.type)): \(field.value)")
}
Kotlin
import fyi.oxide.pdf.PdfDocument
PdfDocument.open("form.pdf").use { doc ->
doc.formFields().forEach { field ->
println("${field.name()} (${field.type()}): ${field.valueOrNull() ?: ""}")
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
print('${field.name} (${field.type}): ${field.value}');
}
R
library(pdfoxide)
doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
cat(sprintf("%s (%s): %s\n", field$name, field$type, field$value))
}
Julia
using PdfOxide
doc = open_document("form.pdf")
for field in get_form_fields(doc)
println("$(field.name) ($(field.type)): $(field.value)")
end
Zig
const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;
var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const name = try fields.getName(a, i);
const ftype = try fields.getType(a, i);
const value = try fields.getValue(a, i);
std.debug.print("{s} ({s}): {s}\n", .{ name, ftype, value });
}
Scala
import fyi.oxide.pdf.{PdfDocument, valueOption}
import scala.util.Using
Using.resource(PdfDocument.open("form.pdf")) { doc =>
doc.formFieldsSeq.foreach { field =>
println(s"${field.name} (${field.`type`}): ${field.valueOption.getOrElse("")}")
}
}
Clojure
(require '[pdf-oxide.core :as pdf])
(with-open [doc (pdf/open "form.pdf")]
(doseq [field (pdf/form-fields doc)]
(println (format "%s (%s): %s"
(.name field) (.type field) (.orElse (.value field) "")))))
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
NSLog(@"%@ (%@): %@", field.name, field.type, field.value);
}
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
IO.puts("#{field.name} (#{field.type}): #{field.value}")
end)
PyMuPDF get_form_fields()에서 마이그레이션
PyMuPDF에서 전환하는 경우 API 구조는 유사하지만, PDF Oxide는 더 풍부한 데이터를 반환하고 XFA 폼도 지원합니다.
PyMuPDF:
import fitz
doc = fitz.open("form.pdf")
# Returns dict of {field_name: field_value} — loses type info
fields = doc.get_form_fields()
# Or iterate widgets for more detail
for page in doc:
for widget in page.widgets():
print(widget.field_name, widget.field_value)
PDF Oxide:
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Returns structured objects with name, value, type, options, rect
fields = doc.get_form_fields()
for field in fields:
print(f"{field.name} ({field.field_type}): {field.value}")
# Also handles XFA forms that PyMuPDF cannot read
xfa = doc.has_xfa()
주요 차이점:
- PDF Oxide는 구조화된 필드 객체를 반환합니다(딕셔너리가 아님)
- 필드 타입, 바운딩 사각형, 선택 필드의 옵션을 포함합니다
- XFA 폼 지원 — PyMuPDF의
get_form_fields()는 XFA 전용 PDF에서 빈 결과를 반환합니다 - 폼 데이터 교환을 위한 FDF/XFDF 형식으로 내보내기 지원
PyMuPDF, pypdf, pdfplumber, pdfminer를 포함한 전체 마이그레이션 가이드는 PDF Oxide로 마이그레이션을 참조하세요.
폼 필드 읽기
모든 필드 가져오기
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("tax-form.pdf")
fields = doc.get_form_fields()
for field in fields:
print(f"Name: {field.name}")
print(f" Type: {field.field_type}")
print(f" Value: {field.value}")
print(f" Required: {field.is_required}")
print(f" Read-only: {field.is_readonly}")
if field.max_length:
print(f" Max length: {field.max_length}")
Node.js
const doc = new PdfDocument("tax-form.pdf");
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`Name: ${field.name}`);
console.log(` Type: ${field.fieldType}`);
console.log(` Value: ${field.value}`);
}
doc.close();
Go
doc, _ := pdfoxide.Open("tax-form.pdf")
defer doc.Close()
fields, _ := doc.FormFields()
for _, field := range fields {
fmt.Printf("Name: %s\n", field.Name)
fmt.Printf(" Type: %s\n", field.FieldType)
fmt.Printf(" Value: %s\n", field.Value)
}
C#
using var doc = PdfDocument.Open("tax-form.pdf");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
Console.WriteLine($"Name: {field.Name}");
Console.WriteLine($" Type: {field.FieldType}");
Console.WriteLine($" Value: {field.Value}");
}
WASM
const doc = new WasmPdfDocument(bytes);
const fields = doc.getFormFields();
for (const field of fields) {
console.log(`Name: ${field.name}`);
console.log(` Type: ${field.fieldType}`);
console.log(` Value: ${field.value}`);
console.log(` Flags: ${field.flags}`);
}
Rust
use pdf_oxide::extractors::{FormExtractor, FieldType};
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("tax-form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
for field in &fields {
let type_str = match &field.field_type {
FieldType::Button => "Button",
FieldType::Text => "Text",
FieldType::Choice => "Choice",
FieldType::Signature => "Signature",
FieldType::Unknown(s) => s.as_str(),
};
println!("[{}] {} = {:?}", type_str, field.full_name, field.value);
if let Some(tooltip) = &field.tooltip {
println!(" Tooltip: {}", tooltip);
}
if let Some(bounds) = &field.bounds {
println!(" Bounds: [{:.1}, {:.1}, {:.1}, {:.1}]",
bounds[0], bounds[1], bounds[2], bounds[3]);
}
}
Java
import fyi.oxide.pdf.PdfDocument;
import fyi.oxide.pdf.form.FormField;
try (PdfDocument doc = PdfDocument.open("tax-form.pdf")) {
for (FormField field : doc.formFields()) {
System.out.println("Name: " + field.name());
System.out.println(" Type: " + field.type());
System.out.println(" Value: " + field.value().orElse(""));
}
}
Ruby
PdfOxide::PdfDocument.open('tax-form.pdf') do |doc|
doc.form_fields.each do |field|
puts "Name: #{field[:name]}"
puts " Type: #{field[:type]}"
puts " Value: #{field[:value]}"
end
end
C++
auto doc = pdf_oxide::Document::open("tax-form.pdf");
for (const auto& field : doc.get_form_fields()) {
std::cout << "Name: " << field.name << "\n";
std::cout << " Type: " << field.type << "\n";
std::cout << " Value: " << field.value << "\n";
std::cout << " Read-only: " << field.readonly << "\n";
std::cout << " Required: " << field.required << "\n";
}
Swift
let doc = try Document.open("tax-form.pdf")
for field in try doc.formFields() {
print("Name: \(field.name)")
print(" Type: \(field.type)")
print(" Value: \(field.value)")
print(" Read-only: \(field.readonly)")
print(" Required: \(field.required)")
}
Kotlin
PdfDocument.open("tax-form.pdf").use { doc ->
doc.formFields().forEach { field ->
println("Name: ${field.name()}")
println(" Type: ${field.type()}")
println(" Value: ${field.valueOrNull() ?: ""}")
}
}
Dart
final doc = PdfDocument.open('tax-form.pdf');
for (final field in doc.getFormFields()) {
print('Name: ${field.name}');
print(' Type: ${field.type}');
print(' Value: ${field.value}');
print(' Read-only: ${field.readonly}');
print(' Required: ${field.required}');
}
R
doc <- pdf_open("tax-form.pdf")
for (field in pdf_get_form_fields(doc)) {
cat(sprintf("Name: %s\n", field$name))
cat(sprintf(" Type: %s\n", field$type))
cat(sprintf(" Value: %s\n", field$value))
cat(sprintf(" Read-only: %s\n", field$readonly))
cat(sprintf(" Required: %s\n", field$required))
}
Julia
doc = open_document("tax-form.pdf")
for field in get_form_fields(doc)
println("Name: $(field.name)")
println(" Type: $(field.type)")
println(" Value: $(field.value)")
println(" Read-only: $(field.readonly)")
println(" Required: $(field.required)")
end
Zig
var doc = try pdf_oxide.Document.open("tax-form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const name = try fields.getName(a, i);
const ftype = try fields.getType(a, i);
const value = try fields.getValue(a, i);
std.debug.print("Name: {s}\n Type: {s}\n Value: {s}\n", .{ name, ftype, value });
std.debug.print(" Read-only: {}\n Required: {}\n", .{
try fields.isReadonly(i), try fields.isRequired(i),
});
}
Scala
Using.resource(PdfDocument.open("tax-form.pdf")) { doc =>
doc.formFieldsSeq.foreach { field =>
println(s"Name: ${field.name}")
println(s" Type: ${field.`type`}")
println(s" Value: ${field.valueOption.getOrElse("")}")
}
}
Clojure
(with-open [doc (pdf/open "tax-form.pdf")]
(doseq [field (pdf/form-fields doc)]
(println "Name:" (.name field))
(println " Type:" (.type field))
(println " Value:" (.orElse (.value field) ""))))
Objective-C
POXDocument *doc = [POXDocument openPath:@"tax-form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
NSLog(@"Name: %@", field.name);
NSLog(@" Type: %@", field.type);
NSLog(@" Value: %@", field.value);
NSLog(@" Read-only: %d", field.readonly);
NSLog(@" Required: %d", field.required);
}
Elixir
{:ok, doc} = PdfOxide.open("tax-form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
Enum.each(fields, fn field ->
IO.puts("Name: #{field.name}")
IO.puts(" Type: #{field.type}")
IO.puts(" Value: #{field.value}")
IO.puts(" Read-only: #{field.read_only}")
IO.puts(" Required: #{field.required}")
end)
특정 필드 값 가져오기
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
name = doc.get_form_field_value("employee_name")
ssn = doc.get_form_field_value("ssn")
agreed = doc.get_form_field_value("agree_to_terms")
print(f"Name: {name}") # "John Doe"
print(f"SSN: {ssn}") # "123-45-6789"
print(f"Agreed: {agreed}") # True
WASM
const doc = new WasmPdfDocument(bytes);
const name = doc.getFormFieldValue("employee_name");
const ssn = doc.getFormFieldValue("ssn");
const agreed = doc.getFormFieldValue("agree_to_terms");
console.log(`Name: ${name}`); // "John Doe"
console.log(`SSN: ${ssn}`); // "123-45-6789"
console.log(`Agreed: ${agreed}`); // true
Rust
use pdf_oxide::editor::{DocumentEditor, EditableDocument};
let mut editor = DocumentEditor::open("form.pdf")?;
if let Some(value) = editor.get_form_field_value("employee_name")? {
println!("Name: {:?}", value);
}
폼 작성
필드 값 설정
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Set text fields
doc.set_form_field_value("full_name", "Jane Doe")
doc.set_form_field_value("email", "jane@example.com")
# Set checkboxes
doc.set_form_field_value("agree_to_terms", True)
# Save the filled form
doc.save("filled_form.pdf")
WASM
const doc = new WasmPdfDocument(bytes);
// Set text fields
doc.setFormFieldValue("full_name", "Jane Doe");
doc.setFormFieldValue("email", "jane@example.com");
// Set checkboxes
doc.setFormFieldValue("agree_to_terms", true);
// Save the filled form
const filledBytes = doc.save();
Rust
use pdf_oxide::editor::{DocumentEditor, EditableDocument, FormFieldValue};
let mut editor = DocumentEditor::open("form.pdf")?;
// Set text fields
editor.set_form_field_value("full_name", FormFieldValue::Text("Jane Doe".into()))?;
editor.set_form_field_value("email", FormFieldValue::Text("jane@example.com".into()))?;
// Set checkboxes
editor.set_form_field_value("agree_to_terms", FormFieldValue::Boolean(true))?;
// Set choice fields
editor.set_form_field_value("state", FormFieldValue::Choice("California".into()))?;
editor.save("filled_form.pdf")?;
Java
import fyi.oxide.pdf.DocumentEditor;
try (DocumentEditor editor = DocumentEditor.open("form.pdf")) {
// Set text fields
editor.setFormField("full_name", "Jane Doe");
editor.setFormField("email", "jane@example.com");
// Set checkboxes
editor.setFormField("agree_to_terms", true);
editor.saveTo(java.nio.file.Path.of("filled_form.pdf"));
}
Ruby
PdfOxide::DocumentEditor.open('form.pdf') do |editor|
# Set text fields
editor.set_form_field('full_name', 'Jane Doe')
editor.set_form_field('email', 'jane@example.com')
# Set checkboxes
editor.set_form_field('agree_to_terms', true)
editor.save_to('filled_form.pdf')
end
C++
auto editor = pdf_oxide::DocumentEditor::open("form.pdf");
// Set text fields
editor.set_form_field_value("full_name", "Jane Doe");
editor.set_form_field_value("email", "jane@example.com");
// Set checkboxes
editor.set_form_field_value("agree_to_terms", "true");
editor.save("filled_form.pdf");
Swift
let editor = try DocumentEditor.openEditor("form.pdf")
// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe")
try editor.setFormFieldValue("email", "jane@example.com")
// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true")
try editor.save("filled_form.pdf")
Kotlin
import fyi.oxide.pdf.DocumentEditor
DocumentEditor.open("form.pdf").use { editor ->
// Set text fields
editor.setFormField("full_name", "Jane Doe")
editor.setFormField("email", "jane@example.com")
// Set checkboxes
editor.setFormField("agree_to_terms", true)
editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}
Dart
final editor = DocumentEditor.open('form.pdf');
// Set text fields
editor.setFormFieldValue('full_name', 'Jane Doe');
editor.setFormFieldValue('email', 'jane@example.com');
// Set checkboxes
editor.setFormFieldValue('agree_to_terms', 'true');
editor.save('filled_form.pdf');
R
editor <- pdf_editor_open("form.pdf")
# Set text fields
pdf_editor_set_form_field_value(editor, "full_name", "Jane Doe")
pdf_editor_set_form_field_value(editor, "email", "jane@example.com")
# Set checkboxes
pdf_editor_set_form_field_value(editor, "agree_to_terms", "true")
pdf_editor_save(editor, "filled_form.pdf")
Julia
editor = open_editor("form.pdf")
# Set text fields
set_form_field_value(editor, "full_name", "Jane Doe")
set_form_field_value(editor, "email", "jane@example.com")
# Set checkboxes
set_form_field_value(editor, "agree_to_terms", "true")
save(editor, "filled_form.pdf")
Zig
var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();
// Set text fields
try editor.setFormFieldValue("full_name", "Jane Doe");
try editor.setFormFieldValue("email", "jane@example.com");
// Set checkboxes
try editor.setFormFieldValue("agree_to_terms", "true");
try editor.save("filled_form.pdf");
Scala
import fyi.oxide.pdf.DocumentEditor
import scala.util.Using
Using.resource(DocumentEditor.open("form.pdf")) { editor =>
// Set text fields
editor.setFormField("full_name", "Jane Doe")
editor.setFormField("email", "jane@example.com")
// Set checkboxes
editor.setFormField("agree_to_terms", true)
editor.saveTo(java.nio.file.Path.of("filled_form.pdf"))
}
Clojure
(with-open [editor (pdf/editor "form.pdf")]
;; Set text fields
(.setFormField editor "full_name" "Jane Doe")
(.setFormField editor "email" "jane@example.com")
;; Set checkboxes
(.setFormField editor "agree_to_terms" true)
(.saveTo editor (java.nio.file.Path/of "filled_form.pdf" (make-array String 0))))
Objective-C
POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
// Set text fields
[editor setFormField:@"full_name" value:@"Jane Doe" error:&err];
[editor setFormField:@"email" value:@"jane@example.com" error:&err];
// Set checkboxes
[editor setFormField:@"agree_to_terms" value:@"true" error:&err];
[editor saveToPath:@"filled_form.pdf" error:&err];
Elixir
{:ok, editor} = PdfOxide.open_editor("form.pdf")
# Set text fields
PdfOxide.set_form_field_value(editor, "full_name", "Jane Doe")
PdfOxide.set_form_field_value(editor, "email", "jane@example.com")
# Set checkboxes
PdfOxide.set_form_field_value(editor, "agree_to_terms", "true")
PdfOxide.editor_save(editor, "filled_form.pdf")
폼 데이터 내보내기
폼 필드 데이터를 FDF 또는 XFDF 형식으로 내보내 다른 애플리케이션과 공유할 수 있습니다.
FDF 내보내기
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.fdf")
WASM
const doc = new WasmPdfDocument(bytes);
const fdfBytes = doc.exportFormData("fdf");
// fdfBytes is a Uint8Array
Rust
use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let fdf_bytes = FormExtractor::export_fdf(&mut doc, fields)?;
std::fs::write("form_data.fdf", &fdf_bytes)?;
C++
auto doc = pdf_oxide::Document::open("form.pdf");
auto fdf = doc.export_form_data_to_bytes(0); // 0 = FDF
std::ofstream("form_data.fdf", std::ios::binary)
.write(reinterpret_cast<const char*>(fdf.data()), fdf.size());
Swift
let doc = try Document.open("form.pdf")
let fdf = try doc.exportFormData(formatType: 0) // 0 = FDF
try Data(fdf).write(to: URL(fileURLWithPath: "form_data.fdf"))
Dart
final doc = PdfDocument.open('form.pdf');
final fdf = doc.exportFormDataToBytes(0); // 0 = FDF
File('form_data.fdf').writeAsBytesSync(fdf);
R
doc <- pdf_open("form.pdf")
fdf <- pdf_export_form_data_to_bytes(doc, format_type = 0L) # 0 = FDF
writeBin(fdf, "form_data.fdf")
Julia
doc = open_document("form.pdf")
fdf = export_form_data_to_bytes(doc, 0) # 0 = FDF
write("form_data.fdf", fdf)
Zig
var doc = try pdf_oxide.Document.open("form.pdf");
const fdf = try doc.exportFormDataToBytes(a, 0); // 0 = FDF
defer a.free(fdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.fdf", .data = fdf });
Objective-C
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *fdf = [doc exportFormDataToBytes:0 error:&err]; // 0 = FDF
[fdf writeToFile:@"form_data.fdf" atomically:YES];
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fdf} = PdfOxide.export_form_data_to_bytes(doc, 0) # 0 = FDF
File.write!("form_data.fdf", fdf)
XFDF 내보내기
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
doc.export_form_data("form_data.xfdf", format="xfdf")
WASM
const doc = new WasmPdfDocument(bytes);
const xfdfBytes = doc.exportFormData("xfdf");
Rust
use pdf_oxide::extractors::FormExtractor;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let xfdf = FormExtractor::export_xfdf(&mut doc, fields)?;
std::fs::write("form_data.xfdf", &xfdf)?;
C++
auto doc = pdf_oxide::Document::open("form.pdf");
auto xfdf = doc.export_form_data_to_bytes(1); // 1 = XFDF
std::ofstream("form_data.xfdf", std::ios::binary)
.write(reinterpret_cast<const char*>(xfdf.data()), xfdf.size());
Swift
let doc = try Document.open("form.pdf")
let xfdf = try doc.exportFormData(formatType: 1) // 1 = XFDF
try Data(xfdf).write(to: URL(fileURLWithPath: "form_data.xfdf"))
Dart
final doc = PdfDocument.open('form.pdf');
final xfdf = doc.exportFormDataToBytes(1); // 1 = XFDF
File('form_data.xfdf').writeAsBytesSync(xfdf);
R
doc <- pdf_open("form.pdf")
xfdf <- pdf_export_form_data_to_bytes(doc, format_type = 1L) # 1 = XFDF
writeBin(xfdf, "form_data.xfdf")
Julia
doc = open_document("form.pdf")
xfdf = export_form_data_to_bytes(doc, 1) # 1 = XFDF
write("form_data.xfdf", xfdf)
Zig
var doc = try pdf_oxide.Document.open("form.pdf");
const xfdf = try doc.exportFormDataToBytes(a, 1); // 1 = XFDF
defer a.free(xfdf);
try std.fs.cwd().writeFile(.{ .sub_path = "form_data.xfdf", .data = xfdf });
Objective-C
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
NSData *xfdf = [doc exportFormDataToBytes:1 error:&err]; // 1 = XFDF
[xfdf writeToFile:@"form_data.xfdf" atomically:YES];
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, xfdf} = PdfOxide.export_form_data_to_bytes(doc, 1) # 1 = XFDF
File.write!("form_data.xfdf", xfdf)
데이터 가져오기에 대하여
내보내기는 왕복 흐름의 읽기 측면입니다. FDF/XFDF 데이터를 PDF에 다시 쓰는 쓰기 측면은 편집 문서에서 다룹니다. C ABI와 Swift 바인딩에서 pdf_editor_import_fdf_bytes, pdf_editor_import_xfdf_bytes, pdf_form_import_from_file, pdf_document_import_form_data가 선언되어 있지만, v0.3.69에서는 Unsupported 상태를 반환합니다. 현재 권장하는 이식 가능한 방법은 FDF/XFDF를 직접 파싱하고 set_form_field_value를 호출하는 것입니다. 실행 가능한 가져오기 예시는 폼 필드 편집 → 폼 데이터 가져오기를 참조하세요.
Markdown 및 HTML에서의 폼 필드
폼 필드 값은 기본적으로 Markdown 및 HTML 변환 시 포함됩니다. include_form_fields 파라미터로 이를 제어할 수 있습니다.
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Include form field values (default)
md = doc.to_markdown(0, include_form_fields=True)
# Exclude form fields
md = doc.to_markdown(0, include_form_fields=False)
WASM
const doc = new WasmPdfDocument(bytes);
// Include form fields (default: true)
const md = doc.toMarkdown(0, true, true, true);
// Exclude form fields (4th parameter)
const md2 = doc.toMarkdown(0, true, true, false);
Rust
use pdf_oxide::PdfDocument;
use pdf_oxide::converters::ConversionOptions;
let doc = PdfDocument::open("form.pdf")?;
let options = ConversionOptions {
include_form_fields: true,
..Default::default()
};
let md = doc.to_markdown(0, &options)?;
폼 평탄화
폼 필드를 페이지 콘텐츠로 병합하여 편집 불가능하게 만듭니다. 최종 확정 PDF를 생성할 때 유용합니다.
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
# Flatten all form fields
doc.flatten_forms()
doc.save("flattened.pdf")
# Or flatten a single page
doc2 = PdfDocument("form.pdf")
doc2.flatten_forms_on_page(0)
doc2.save("flattened_page0.pdf")
WASM
const doc = new WasmPdfDocument(bytes);
// Flatten all form fields
doc.flattenForms();
const flattened = doc.save();
// Or flatten a single page
const doc2 = new WasmPdfDocument(bytes);
doc2.flattenFormsOnPage(0);
const flattened2 = doc2.save();
Rust
use pdf_oxide::Pdf;
let mut pdf = Pdf::open("form.pdf")?;
// Mark a specific page for flattening
pdf.flatten_page_annotations(0);
pdf.save("flattened.pdf")?;
// Or flatten all pages
let mut pdf2 = Pdf::open("form.pdf")?;
pdf2.flatten_all_annotations();
pdf2.save("flattened_all.pdf")?;
C++
auto editor = pdf_oxide::DocumentEditor::open("form.pdf");
// Flatten all form fields
editor.flatten_forms();
editor.save("flattened.pdf");
// Or flatten a single page
auto editor2 = pdf_oxide::DocumentEditor::open("form.pdf");
editor2.flatten_forms_on_page(0);
editor2.save("flattened_page0.pdf");
Swift
let editor = try DocumentEditor.openEditor("form.pdf")
// Flatten all form fields
try editor.flattenForms()
try editor.save("flattened.pdf")
// Or flatten a single page
let editor2 = try DocumentEditor.openEditor("form.pdf")
try editor2.flattenFormsOnPage(0)
try editor2.save("flattened_page0.pdf")
Dart
final editor = DocumentEditor.open('form.pdf');
// Flatten all form fields
editor.flattenForms();
editor.save('flattened.pdf');
// Or flatten a single page
final editor2 = DocumentEditor.open('form.pdf');
editor2.flattenFormsOnPage(0);
editor2.save('flattened_page0.pdf');
R
editor <- pdf_editor_open("form.pdf")
# Flatten all form fields
pdf_editor_flatten_forms(editor)
pdf_editor_save(editor, "flattened.pdf")
# Or flatten a single page
editor2 <- pdf_editor_open("form.pdf")
pdf_editor_flatten_forms_on_page(editor2, 0)
pdf_editor_save(editor2, "flattened_page0.pdf")
Julia
editor = open_editor("form.pdf")
# Flatten all form fields
flatten_forms(editor)
save(editor, "flattened.pdf")
# Or flatten a single page
editor2 = open_editor("form.pdf")
flatten_forms_on_page(editor2, 0)
save(editor2, "flattened_page0.pdf")
Zig
var editor = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor.deinit();
// Flatten all form fields
try editor.flattenForms();
try editor.save("flattened.pdf");
// Or flatten a single page
var editor2 = try pdf_oxide.DocumentEditor.openEditor("form.pdf");
defer editor2.deinit();
try editor2.flattenFormsOnPage(0);
try editor2.save("flattened_page0.pdf");
Objective-C
POXDocumentEditor *editor = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
// Flatten all form fields
[editor flattenForms:&err];
[editor saveToPath:@"flattened.pdf" error:&err];
// Or flatten a single page
POXDocumentEditor *editor2 = [POXDocumentEditor openEditor:@"form.pdf" error:&err];
[editor2 flattenFormsOnPage:0 error:&err];
[editor2 saveToPath:@"flattened_page0.pdf" error:&err];
Elixir
{:ok, editor} = PdfOxide.open_editor("form.pdf")
# Flatten all form fields
PdfOxide.flatten_forms(editor)
PdfOxide.editor_save(editor, "flattened.pdf")
# Or flatten a single page
{:ok, editor2} = PdfOxide.open_editor("form.pdf")
PdfOxide.flatten_forms_on_page(editor2, 0)
PdfOxide.editor_save(editor2, "flattened_page0.pdf")
XFA 폼
XFA(XML Forms Architecture) 폼 콘텐츠를 분석합니다. XFA 폼은 AcroForm 필드 대신 XML 기반 템플릿을 사용하며, 정부 기관이나 기업용 폼에서 흔히 볼 수 있습니다.
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("xfa-form.pdf")
if doc.has_xfa():
print("This document contains an XFA form")
fields = doc.get_form_fields() # Extracts AcroForm fallback fields
for field in fields:
print(f" {field.name}: {field.value}")
Node.js
const doc = new PdfDocument("xfa-form.pdf");
if (doc.hasXFA()) {
console.log("This document contains an XFA form");
const fields = doc.getFormFields();
for (const field of fields) {
console.log(` ${field.name}: ${field.value}`);
}
}
doc.close();
Go
doc, _ := pdfoxide.Open("xfa-form.pdf")
defer doc.Close()
if doc.HasXfa() {
fmt.Println("This document contains an XFA form")
fields, _ := doc.FormFields()
for _, field := range fields {
fmt.Printf(" %s: %s\n", field.Name, field.Value)
}
}
C#
using var doc = PdfDocument.Open("xfa-form.pdf");
if (doc.HasXfa)
{
Console.WriteLine("This document contains an XFA form");
var fields = doc.GetFormFields();
foreach (var field in fields)
{
Console.WriteLine($" {field.Name}: {field.Value}");
}
}
WASM
const doc = new WasmPdfDocument(bytes);
if (doc.hasXfa()) {
console.log("This document contains an XFA form");
const fields = doc.getFormFields(); // AcroForm fallback fields
for (const field of fields) {
console.log(` ${field.name}: ${field.value}`);
}
}
Rust
use pdf_oxide::xfa::analyze_xfa_document;
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("xfa-form.pdf")?;
let analysis = analyze_xfa_document(&mut doc)?;
println!("XFA form detected: {} fields", analysis.fields.len());
for field in &analysis.fields {
println!(" {} ({:?})", field.name, field.field_type);
}
C++
auto doc = pdf_oxide::Document::open("xfa-form.pdf");
if (doc.has_xfa()) {
std::cout << "This document contains an XFA form\n";
for (const auto& field : doc.get_form_fields()) { // AcroForm fallback
std::cout << " " << field.name << ": " << field.value << "\n";
}
}
Swift
let doc = try Document.open("xfa-form.pdf")
if try doc.hasXfa() {
print("This document contains an XFA form")
for field in try doc.formFields() { // AcroForm fallback
print(" \(field.name): \(field.value)")
}
}
Dart
final doc = PdfDocument.open('xfa-form.pdf');
if (doc.hasXfa()) {
print('This document contains an XFA form');
for (final field in doc.getFormFields()) { // AcroForm fallback
print(' ${field.name}: ${field.value}');
}
}
R
doc <- pdf_open("xfa-form.pdf")
if (pdf_has_xfa(doc)) {
cat("This document contains an XFA form\n")
for (field in pdf_get_form_fields(doc)) { # AcroForm fallback
cat(sprintf(" %s: %s\n", field$name, field$value))
}
}
Julia
doc = open_document("xfa-form.pdf")
if has_xfa(doc)
println("This document contains an XFA form")
for field in get_form_fields(doc) # AcroForm fallback
println(" $(field.name): $(field.value)")
end
end
Zig
var doc = try pdf_oxide.Document.open("xfa-form.pdf");
if (doc.hasXfa()) {
std.debug.print("This document contains an XFA form\n", .{});
var fields = try doc.formFields(); // AcroForm fallback
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const name = try fields.getName(a, i);
const value = try fields.getValue(a, i);
std.debug.print(" {s}: {s}\n", .{ name, value });
}
}
Objective-C
POXDocument *doc = [POXDocument openPath:@"xfa-form.pdf" error:&err];
if ([doc hasXfa]) {
NSLog(@"This document contains an XFA form");
for (POXFormField *field in [doc formFieldsWithError:&err]) { // AcroForm fallback
NSLog(@" %@: %@", field.name, field.value);
}
}
Elixir
{:ok, doc} = PdfOxide.open("xfa-form.pdf")
if PdfOxide.has_xfa?(doc) do
IO.puts("This document contains an XFA form")
{:ok, fields} = PdfOxide.form_fields(doc) # AcroForm fallback
Enum.each(fields, fn field -> IO.puts(" #{field.name}: #{field.value}") end)
end
API 레퍼런스
Python API
| 메서드 | 설명 |
|---|---|
doc.get_form_fields() |
모든 폼 필드를 FormField 객체로 가져오기 |
doc.get_form_field_value(name) |
이름으로 특정 필드 값 가져오기 |
doc.set_form_field_value(name, value) |
폼 필드 값 설정 |
doc.export_form_data(path, format="fdf") |
폼 데이터를 FDF 또는 XFDF 파일로 내보내기 |
doc.has_xfa() |
문서에 XFA 폼이 포함되어 있는지 확인 |
doc.flatten_forms() |
모든 폼 필드를 페이지 콘텐츠로 평탄화 |
doc.flatten_forms_on_page(page) |
특정 페이지의 폼 필드 평탄화 |
Python FormField 속성
| 속성 | 타입 | 설명 |
|---|---|---|
name |
str |
필드 이름 |
field_type |
str |
필드 타입 (text, checkbox, radio, choice, signature) |
value |
str | bool | None |
현재 필드 값 |
is_required |
bool |
필수 필드 여부 |
is_readonly |
bool |
읽기 전용 여부 |
max_length |
int | None |
텍스트 필드의 최대 길이 |
JavaScript API
| 메서드 | 설명 |
|---|---|
doc.getFormFields() |
모든 폼 필드 가져오기 |
doc.getFormFieldValue(name) |
이름으로 특정 필드 값 가져오기 |
doc.setFormFieldValue(name, value) |
폼 필드 값 설정 |
doc.exportFormData(format?) |
FDF(기본값) 또는 XFDF로 내보내기, Uint8Array 반환 |
doc.hasXfa() |
문서에 XFA 폼이 포함되어 있는지 확인 |
doc.flattenForms() |
모든 폼 필드를 페이지 콘텐츠로 평탄화 |
doc.flattenFormsOnPage(pageIndex) |
특정 페이지의 폼 필드 평탄화 |
JavaScript FormField 속성
| 속성 | 타입 | 설명 |
|---|---|---|
name |
string |
필드 이름 |
fieldType |
string |
필드 타입 |
value |
string | boolean | null |
현재 값 |
flags |
number |
필드 플래그 |
Rust API
| 함수 | 설명 |
|---|---|
FormExtractor::extract_fields(doc) |
AcroForm 딕셔너리에서 모든 폼 필드 추출 |
FormExtractor::export_fdf(doc, fields) |
FDF 바이트로 내보내기 |
FormExtractor::export_xfdf(doc, fields) |
XFDF 문자열로 내보내기 |
analyze_xfa_document(doc) |
XFA 폼 구조 분석 |
editor.get_form_fields() |
DocumentEditor를 통해 필드 가져오기 |
editor.get_form_field_value(name) |
이름으로 필드 값 가져오기 |
editor.set_form_field_value(name, value) |
필드 값 설정 |
FormField 필드 (Rust)
| 필드 | 타입 | 설명 |
|---|---|---|
name |
String |
/T 키에서 가져온 필드 이름 |
full_name |
String |
완전한 정규화 이름 (점 구분자) |
field_type |
FieldType |
Button, Text, Choice, Signature, Unknown |
value |
FieldValue |
현재 필드 값 |
tooltip |
Option<String> |
/TU 키에서 가져온 툴팁 |
bounds |
Option<[f64; 4]> |
경계 상자 [x1, y1, x2, y2] |
flags |
Option<u32> |
필드 플래그 (ReadOnly, Required, NoExport) |
default_value |
Option<FieldValue> |
/DV 키에서 가져온 기본값 |
max_length |
Option<u32> |
텍스트 필드의 최대 길이 |
FieldType 변형
| 변형 | 설명 |
|---|---|
Button |
체크박스, 라디오 버튼, 또는 푸시 버튼 (/Btn) |
Text |
단일 또는 여러 줄 텍스트 필드 (/Tx) |
Choice |
목록 상자 또는 콤보 상자 (/Ch) |
Signature |
디지털 서명 필드 (/Sig) |
Unknown(String) |
인식할 수 없는 필드 타입 |
FieldValue 변형
| 변형 | 설명 |
|---|---|
Text(String) |
텍스트 문자열 값 |
Boolean(bool) |
불리언 값 (체크박스) |
Name(String) |
이름 값 (라디오 버튼, 선택 필드) |
Array(Vec<String>) |
다중 값 (다중 선택 목록 상자) |
None |
값 없음 |
고급: 필수 필드 확인
Python
from pdf_oxide import PdfDocument
doc = PdfDocument("form.pdf")
fields = doc.get_form_fields()
missing = [f for f in fields if f.is_required and not f.value]
if missing:
print("Missing required fields:")
for f in missing:
print(f" - {f.name}")
Rust
use pdf_oxide::extractors::{FormExtractor, FieldValue};
use pdf_oxide::PdfDocument;
let mut doc = PdfDocument::open("form.pdf")?;
let fields = FormExtractor::extract_fields(&mut doc)?;
let required_empty: Vec<_> = fields.iter()
.filter(|f| {
f.flags.map_or(false, |flags| flags & 0x02 != 0)
&& matches!(f.value, FieldValue::None | FieldValue::Text(ref s) if s.is_empty())
})
.collect();
if !required_empty.is_empty() {
println!("Missing required fields:");
for f in &required_empty {
println!(" - {}", f.full_name);
}
}
C++
auto doc = pdf_oxide::Document::open("form.pdf");
for (const auto& field : doc.get_form_fields()) {
if (field.required && field.value.empty()) {
std::cout << "Missing required field: " << field.name << "\n";
}
}
Swift
let doc = try Document.open("form.pdf")
for field in try doc.formFields() where field.required && field.value.isEmpty {
print("Missing required field: \(field.name)")
}
Dart
final doc = PdfDocument.open('form.pdf');
for (final field in doc.getFormFields()) {
if (field.required && field.value.isEmpty) {
print('Missing required field: ${field.name}');
}
}
R
doc <- pdf_open("form.pdf")
for (field in pdf_get_form_fields(doc)) {
if (field$required && field$value == "") {
cat(sprintf("Missing required field: %s\n", field$name))
}
}
Julia
doc = open_document("form.pdf")
for field in get_form_fields(doc)
if field.required && isempty(field.value)
println("Missing required field: $(field.name)")
end
end
Zig
var doc = try pdf_oxide.Document.open("form.pdf");
var fields = try doc.formFields();
defer fields.deinit();
const count = try fields.count();
var i: i32 = 0;
while (i < count) : (i += 1) {
const value = try fields.getValue(a, i);
if (try fields.isRequired(i) and value.len == 0) {
const name = try fields.getName(a, i);
std.debug.print("Missing required field: {s}\n", .{name});
}
}
Objective-C
POXDocument *doc = [POXDocument openPath:@"form.pdf" error:&err];
for (POXFormField *field in [doc formFieldsWithError:&err]) {
if (field.required && field.value.length == 0) {
NSLog(@"Missing required field: %@", field.name);
}
}
Elixir
{:ok, doc} = PdfOxide.open("form.pdf")
{:ok, fields} = PdfOxide.form_fields(doc)
fields
|> Enum.filter(fn f -> f.required and f.value in [nil, ""] end)
|> Enum.each(fn f -> IO.puts("Missing required field: #{f.name}") end)
관련 페이지
- PDF 폼 작성 – 단계별 폼 작성 가이드
- 주석 추출 – 폼 필드와 함께 주석 접근
- 텍스트 추출 – 페이지에서 텍스트 콘텐츠 추출
- 메타데이터 및 XMP – 문서 수준 속성 읽기