Skip to content

Go API Reference

The github.com/yfedoseev/pdf_oxide/go module wraps the Rust core via CGo. Since v0.3.27 it links the Rust library as a staticlib, producing fully self-contained Go binaries — no runtime library configuration required.

go get github.com/yfedoseev/pdf_oxide/go
import pdfoxide "github.com/yfedoseev/pdf_oxide/go"

For other languages see Python, Node.js, C#, or Rust. For full generated docs see pkg.go.dev/github.com/yfedoseev/pdf_oxide/go.


Package functions

func Open(path string) (*PdfDocument, error)
func OpenFromBytes(data []byte) (*PdfDocument, error)
func OpenWithPassword(path, password string) (*PdfDocument, error)
func OpenReader(r io.Reader) (*PdfDocument, error)
func OpenEditor(path string) (*DocumentEditor, error)

// Creation
func FromMarkdown(md string) (*PdfCreator, error)
func FromHtml(html string) (*PdfCreator, error)
func FromText(text string) (*PdfCreator, error)
func FromImage(path string) (*PdfCreator, error)
func FromImageBytes(data []byte) (*PdfCreator, error)
func Merge(paths []string) ([]byte, error)

// Barcodes
func GenerateQRCode(data string, format, size int) (*Image, error)
func GenerateBarcode(data string, format, size int) (*Image, error)

// OCR (build tag: ocr)
func NewOcrEngine() (*OcrEngine, error)

// Logging
func SetLogLevel(level int)
func GetLogLevel() int

PdfDocument

Read-only access. Reads are protected by an internal sync.RWMutex — safe for concurrent goroutine use.

Lifecycle

func (d *PdfDocument) Close() error
func (d *PdfDocument) IsClosed() bool

Document info

func (d *PdfDocument) PageCount() (int, error)
func (d *PdfDocument) Version() (int, int, error)
func (d *PdfDocument) HasStructureTree() (bool, error)
func (d *PdfDocument) PageInfo(page int) (PageInfo, error)
func (d *PdfDocument) Authenticate(password string) (bool, error)

Pages (v0.3.34)

func (d *PdfDocument) Page(index int) (*Page, error)
func (d *PdfDocument) Pages() ([]*Page, error)

Page is a lightweight handle that dispatches all extraction methods to the parent document:

type Page struct {
    Index int
}

func (p *Page) Text() (string, error)
func (p *Page) Markdown() (string, error)
func (p *Page) Html() (string, error)
func (p *Page) PlainText() (string, error)
func (p *Page) Chars() ([]Char, error)
func (p *Page) Words() ([]Word, error)
func (p *Page) Lines() ([]TextLine, error)
func (p *Page) Tables() ([]Table, error)
func (p *Page) Images() ([]Image, error)
func (p *Page) Paths() ([]Path, error)
func (p *Page) Fonts() ([]Font, error)
func (p *Page) Annotations() ([]Annotation, error)
func (p *Page) Info() (*PageInfo, error)
func (p *Page) Search(term string, caseSensitive bool) ([]SearchResult, error)
func (p *Page) NeedsOcr() (bool, error)
func (p *Page) TextWithOcr(engine *OcrEngine) (string, error)

Text extraction

func (d *PdfDocument) ExtractText(page int) (string, error)
func (d *PdfDocument) ExtractAllText() (string, error)
func (d *PdfDocument) ToMarkdown(page int) (string, error)
func (d *PdfDocument) ToMarkdownAll() (string, error)
func (d *PdfDocument) ToHtml(page int) (string, error)
func (d *PdfDocument) ToHtmlAll() (string, error)
func (d *PdfDocument) ToPlainText(page int) (string, error)
func (d *PdfDocument) ToPlainTextAll() (string, error)

Structured

func (d *PdfDocument) ExtractWords(page int) ([]Word, error)
func (d *PdfDocument) ExtractTextLines(page int) ([]TextLine, error)
func (d *PdfDocument) ExtractChars(page int) ([]Char, error)
func (d *PdfDocument) ExtractSpans(page int) ([]Span, error)
func (d *PdfDocument) ExtractTables(page int) ([]Table, error)
func (d *PdfDocument) ExtractPaths(page int) ([]Path, error)

Region-based

func (d *PdfDocument) ExtractTextInRect(page int, x, y, w, h float32) (string, error)
func (d *PdfDocument) ExtractWordsInRect(page int, x, y, w, h float32) ([]Word, error)
func (d *PdfDocument) ExtractImagesInRect(page int, x, y, w, h float32) ([]Image, error)

Resources

func (d *PdfDocument) Fonts(page int) ([]Font, error)
func (d *PdfDocument) Images(page int) ([]Image, error)
func (d *PdfDocument) Annotations(page int) ([]Annotation, error)
func (d *PdfDocument) PageElements(page int) ([]Element, error)
func (d *PdfDocument) FormFields() ([]FormField, error)
func (d *PdfDocument) SearchPage(page int, query string, caseSensitive bool) ([]SearchResult, error)
func (d *PdfDocument) SearchAll(query string, caseSensitive bool) ([]SearchResult, error)

Rendering

func (d *PdfDocument) RenderPage(page int, format int) (*Image, error)
func (d *PdfDocument) RenderPageZoom(page int, zoom float32, format int) (*Image, error)
func (d *PdfDocument) RenderThumbnail(page int, width int, format int) (*Image, error)

format values: 0 = PNG, 1 = JPEG.

Validation

func (d *PdfDocument) ValidatePdfA(level int) (*ValidationResult, error)
func (d *PdfDocument) ValidatePdfUa() (bool, []string, error)
func (d *PdfDocument) ValidatePdfX(level int) (bool, []string, error)

DocumentEditor

Serializes writes internally — safe for one goroutine per editor, but do not pipeline independent edits across goroutines.

func (e *DocumentEditor) Close() error

// Metadata
func (e *DocumentEditor) Title() (string, error)
func (e *DocumentEditor) Author() (string, error)
func (e *DocumentEditor) SetTitle(title string) error
func (e *DocumentEditor) SetAuthor(author string) error
func (e *DocumentEditor) SetSubject(s string) error
func (e *DocumentEditor) ApplyMetadata(meta Metadata) error

// Pages
func (e *DocumentEditor) SetPageRotation(page, degrees int) error
func (e *DocumentEditor) MovePage(from, to int) error
func (e *DocumentEditor) DeletePage(page int) error
func (e *DocumentEditor) CropMargins(l, b, r, t float32) error
func (e *DocumentEditor) EraseRegion(page int, x, y, w, h float32) error

// Annotations & forms
func (e *DocumentEditor) FlattenAnnotations(page int) error
func (e *DocumentEditor) FlattenAllAnnotations() error
func (e *DocumentEditor) FlattenForms() error
func (e *DocumentEditor) SetFormFieldValue(name, value string) error

// Merging
func (e *DocumentEditor) MergeFrom(path string) (int, error)

// Save
func (e *DocumentEditor) Save(path string) error
func (e *DocumentEditor) SaveEncrypted(path, user, owner string) error

PdfCreator

func (c *PdfCreator) Save(path string) error
func (c *PdfCreator) ToBytes() ([]byte, error)
func (c *PdfCreator) Close() error

Not safe for concurrent goroutine use.


OcrEngine (build tag ocr)

func (o *OcrEngine) Close() error
func (o *OcrEngine) NeedsOcr(d *PdfDocument, page int) bool
func (o *OcrEngine) ExtractTextWithOcr(d *PdfDocument, page int) (string, error)

Build with go build -tags ocr ./....


Image (render output)

func (i *Image) Close() error
func (i *Image) Data() []byte
func (i *Image) PNGData() []byte
func (i *Image) SaveToFile(path string) error

Errors

Sentinels

var (
    ErrInvalidPath        error
    ErrDocumentNotFound   error
    ErrInvalidFormat      error
    ErrExtractionFailed   error
    ErrParseError         error
    ErrInvalidPageIndex   error
    ErrSearchFailed       error
    ErrInternal           error
    ErrDocumentClosed     error
    ErrEditorClosed       error
    ErrCreatorClosed      error
    ErrIndexOutOfBounds   error
    ErrEmptyContent       error
)

Use errors.Is to check them. For numeric codes and messages use errors.As with *pdfoxide.Error.

Error type

type Error struct {
    Code    int
    Message string
}

func (e *Error) Error() string

Data types

type SearchResult struct {
    Text            string
    Page            int
    X, Y            float32
    Width, Height   float32
}

type Word struct {
    Text string
    X, Y, Width, Height float32
}

type TextLine struct {
    Text string
    Y    float32
    Spans []Span
}

type Char struct {
    Char     string
    X, Y     float32
    FontSize float32
    FontName string
    BBox     Rect
}

type Span struct {
    Text     string
    FontName string
    FontSize float32
    BBox     Rect
}

type Image struct {
    Width, Height    int
    Format           string
    Colorspace       string
    BitsPerComponent int
    Data             []byte
}

type Font struct {
    Name       string
    Type       string
    Encoding   string
    IsEmbedded bool
    IsSubset   bool
    Size       float32
}

type Annotation struct {
    Type, Subtype      string
    Content            string
    X, Y               float32
    Width, Height      float32
    Author             string
    BorderWidth        float32
    Color              uint32
    CreationDate       int64
    ModificationDate   int64
    LinkURI            string
    TextIconName       string
    IsHidden           bool
    IsPrintable        bool
    IsReadOnly         bool
    IsMarkedDeleted    bool
}

type Element struct {
    Type   string
    Text   string
    X, Y   float32
    Width  float32
    Height float32
}

type FormField struct {
    Name      string
    FieldType string
    Value     string
    PageIndex int
}

type Rect struct {
    X, Y, Width, Height float32
}

type PageInfo struct {
    Width    float32
    Height   float32
    Rotation int
    MediaBox Rect
    CropBox  Rect
    ArtBox   Rect
    BleedBox Rect
    TrimBox  Rect
}

type Metadata struct {
    Title        string
    Author       string
    Subject      string
    Producer     string
    CreationDate string
}

Empty fields on Metadata are treated as “do not change” by ApplyMetadata.


Thread safety summary

  • *PdfDocument reads — safe for concurrent goroutines (sync.RWMutex).
  • *DocumentEditor — serialize edits on one goroutine.
  • *PdfCreator — not intended to be shared across goroutines.

See the concurrency guide for patterns.


v0.3.38 additions

The following APIs are CGo-only (//go:build cgo). Under CGO_ENABLED=0 (purego backend) they produce a compile-time error. See Go Getting Started → Installation for backend selection.

DocumentBuilder / FluentPageBuilder / EmbeddedFont

font, _ := pdfoxide.EmbeddedFontFromFile("DejaVuSans.ttf")
// Alt: pdfoxide.EmbeddedFontFromBytes(data []byte, name string) (*EmbeddedFont, error)
defer font.Close()

builder := pdfoxide.NewDocumentBuilder()
builder.RegisterEmbeddedFont("DejaVu", font)
builder.LetterPage().
    At(72, 720).Font("DejaVu", 12).Text("Hello").
    Heading(1, "Title").
    Paragraph("Body text").
    // Annotations (15 methods — link_url/page/named, highlight, underline,
    // strikeout, squiggly, sticky_note, stamp, freetext, watermark variants)
    LinkUrl("https://example.com").
    Highlight(1.0, 1.0, 0.0).
    StickyNote("Review this").
    Stamp(pdfoxide.StampApproved).
    WatermarkDraft().
    // AcroForm widgets
    TextField("name", 150, 400, 200, 20, "Jane Doe").
    Checkbox("agree", 72, 380, 15, 15, true).
    ComboBox("country", 150, 360, 200, 20, []string{"US", "UK"}, "US").
    RadioGroup("tier", []pdfoxide.RadioButton{
        {Value: "free", X: 72, Y: 340, W: 15, H: 15},
        {Value: "pro",  X: 120, Y: 340, W: 15, H: 15},
    }, "pro").
    PushButton("submit", 72, 300, 80, 25, "Submit").
    // Graphics primitives
    Rect(50, 270, 500, 2).
    FilledRect(50, 260, 500, 2, 0.9, 0.9, 0.9).
    Line(50, 250, 550, 250).
    Done()

_ = builder.SaveEncrypted("out.pdf", "user-pw", "owner-pw")
// Alt: builder.Save("out.pdf")
// Alt: bytes, _ := builder.Build()
// Alt: enc, _ := builder.ToBytesEncrypted("user-pw", "owner-pw")

HTML + CSS pipeline

pdf, err := pdfoxide.FromHtmlCss(html, css, fontBytes)
pdf, err := pdfoxide.FromHtmlCssWithFonts(html, css, []pdfoxide.NamedFont{
    {Family: "DejaVu Sans",     Data: font1},
    {Family: "Noto Sans CJK",   Data: font2},
})

Signature verification

sigs, _ := doc.Signatures()
for _, sig := range sigs {
    fmt.Println(sig.SignerName, sig.Reason, sig.Location, sig.SigningTime)
    status, _ := sig.Verify()                  // Valid / Invalid / Unknown
    ok, _ := sig.VerifyDetached(pdfBytes)
}

ts, _ := pdfoxide.ParseTimestamp(tstBytes)
fmt.Println(ts.Time, ts.Serial, ts.PolicyOid, ts.TsaName, ts.HashAlgorithm)

client := pdfoxide.NewTsaClient("https://freetsa.org/tsr").
    WithCredentials("user", "pass").
    WithTimeout(30 * time.Second).
    WithHashAlgorithm(pdfoxide.HashSha256).
    WithNonce(true).
    WithCertReq(true)
fresh, _ := client.RequestTimestamp(pdfBytes)

Rendering

region, _ := doc.RenderPageRegion(0, 72, 200, 468, 300, 0)   // x, y, w, h, format (0=PNG, 1=JPEG)
fitted, _ := doc.RenderPageFit(0, 1024, 768, 0)              // fit_width, fit_height, format