Concurrencia — lecturas de PDF seguras entre hilos
PdfDocument es Send + Sync en el lado de Rust desde la v0.3.22. Un único documento puede compartirse entre hilos del SO, goroutines, worker threads o tareas asyncio para la extracción paralela de páginas. Las operaciones de escritura aún requieren serialización — para eso existe DocumentEditor.
Qué cambió en la v0.3.22
Los 16 wrappers RefCell<T> del interior de PdfDocument se reemplazaron por Mutex<T>, y Cell<usize> se convirtió en AtomicUsize. Los bindings de lenguaje eliminaron el marcador unsendable en las clases Python (PdfDocument, PdfPage, FormField), que antes lanzaban RuntimeError en el momento en que cruzaban un límite de hilo.
Resultado neto: los pools de hilos, los runtimes asíncronos y el Python free-threaded funcionan ahora sin configuración adicional.
Rust
Rust
use pdf_oxide::PdfDocument;
use std::sync::Arc;
use std::thread;
let doc = Arc::new(PdfDocument::open("report.pdf")?);
let page_count = doc.page_count();
let handles: Vec<_> = (0..page_count)
.map(|i| {
let doc = Arc::clone(&doc);
thread::spawn(move || doc.extract_text(i))
})
.collect();
for h in handles {
let text = h.join().unwrap()?;
println!("{}", text);
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
int pageCount = doc.pageCount();
ExecutorService pool = Executors.newFixedThreadPool(8);
List<Future<String>> futures = IntStream.range(0, pageCount)
.mapToObj(i -> pool.submit(() -> doc.extractText(i)))
.collect(Collectors.toList());
for (Future<String> f : futures) System.out.println(f.get());
pool.shutdown();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
PdfDocument.open(java.nio.file.Path.of("report.pdf")).use { doc ->
val pages = runBlocking(Dispatchers.IO) {
(0 until doc.pageCount())
.map { i -> async { doc.extractText(i) } }
.awaitAll()
}
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import scala.concurrent.duration.*
import scala.util.Using
import java.util.concurrent.Executors
import ExecutionContext.Implicits.global
Using.resource(PdfDocument.open("report.pdf")) { doc =>
val pages = (0 until doc.pageCount()).map(i => Future(doc.extractText(i)))
Await.result(Future.sequence(pages), 60.seconds)
}
Clojure
(require '[pdf-oxide.core :as pdf])
(with-open [doc (pdf/open "report.pdf")]
(->> (range (pdf/page-count doc))
(map (fn [i] (future (pdf/extract-text doc i))))
(doall)
(map deref)))
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
pages = (0...doc.page_count).map do |i|
Thread.new { doc.extract_text(i) }
end.map(&:value)
end
PHP
use PdfOxide\PdfDocument;
// PHP no tiene hilos con memoria compartida; procesa secuencialmente
// (las lecturas están bloqueadas internamente, así que es seguro con pthreads/parallel también).
$doc = PdfDocument::open('report.pdf');
$pages = [];
for ($i = 0; $i < $doc->pageCount(); $i++) {
$pages[$i] = $doc->extractText($i);
}
$doc->close();
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <future>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
int page_count = doc.page_count();
std::vector<std::future<std::string>> futures;
for (int i = 0; i < page_count; ++i)
futures.push_back(std::async(std::launch::async,
[&doc, i] { return doc.extract_text(i); }));
for (auto& f : futures) std::cout << f.get();
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let pageCount = try doc.pageCount()
try await withThrowingTaskGroup(of: String.self) { group in
for i in 0..<pageCount {
group.addTask { try doc.extractText(i) }
}
for try await text in group { print(text) }
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final pages = [
for (var i = 0; i < doc.pageCount; i++) doc.extractText(i),
];
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
n <- pdf_page_count(doc)
pages <- mclapply(0:(n - 1), function(i) pdf_extract_text(doc, i))
Julia
using PdfOxide
doc = open_document("report.pdf")
n = page_count(doc)
pages = Vector{String}(undef, n)
Threads.@threads for i in 0:(n - 1)
pages[i + 1] = extract_text(doc, i)
end
Zig
const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;
var doc = try pdf_oxide.Document.open("report.pdf");
const n = try doc.pageCount();
var i: usize = 0;
while (i < n) : (i += 1) {
const text = try doc.extractText(a, i); // reads are internally locked
defer a.free(text);
}
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"report.pdf" error:&err];
NSInteger n = [doc pageCountError:&err];
dispatch_apply(n, dispatch_get_global_queue(0, 0), ^(size_t i) {
NSError *e = nil;
NSString *text = [doc extractText:i error:&e];
});
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, n} = PdfOxide.page_count(doc)
pages =
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
Con tokio:
Rust
use std::sync::Arc;
use tokio::task;
let doc = Arc::new(pdf_oxide::PdfDocument::open("report.pdf")?);
let tasks: Vec<_> = (0..doc.page_count())
.map(|i| {
let doc = Arc::clone(&doc);
task::spawn_blocking(move || doc.extract_text(i))
})
.collect();
for t in tasks {
let text = t.await??;
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
CompletableFuture<?>[] tasks = IntStream.range(0, doc.pageCount())
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toArray(CompletableFuture[]::new);
CompletableFuture.allOf(tasks).join();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
suspend fun extractAll(doc: PdfDocument): List<String> = coroutineScope {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import ExecutionContext.Implicits.global
def extractAll(doc: PdfDocument): Future[Seq[String]] =
Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Swift
import PdfOxide
func extractAll(_ doc: Document) async throws -> [String] {
let n = try doc.pageCount()
return try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, text) in group { out[i] = text }
return out
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
Future<List<String>> extractAll(PdfDocument doc) async {
return Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
}
Elixir
def extract_all(doc) do
{:ok, n} = PdfOxide.page_count(doc)
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
end
Python
Python
from concurrent.futures import ThreadPoolExecutor
from pdf_oxide import PdfDocument
doc = PdfDocument("report.pdf")
with ThreadPoolExecutor(max_workers=8) as pool:
pages = list(pool.map(doc.extract_text, range(doc.page_count())))
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newFixedThreadPool(8);
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
List<String> pages = IntStream.range(0, doc.pageCount())
.mapToObj(i -> pool.submit(() -> doc.extractText(i)))
.collect(Collectors.toList())
.stream().map(f -> { try { return f.get(); } catch (Exception e) { throw new RuntimeException(e); } })
.collect(Collectors.toList());
}
pool.shutdown();
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
pages = (0...doc.page_count)
.map { |i| Thread.new { doc.extract_text(i) } }
.map(&:value)
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <future>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
std::vector<std::future<std::string>> futures;
for (int i = 0; i < doc.page_count(); ++i)
futures.push_back(std::async(std::launch::async,
[&doc, i] { return doc.extract_text(i); }));
std::vector<std::string> pages;
for (auto& f : futures) pages.push_back(f.get());
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let pages = try await withThrowingTaskGroup(of: (Int, String).self) { group -> [String] in
let n = try doc.pageCount()
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, t) in group { out[i] = t }
return out
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final pages = await Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
n <- pdf_page_count(doc)
pages <- mclapply(0:(n - 1), function(i) pdf_extract_text(doc, i), mc.cores = 8)
Julia
using PdfOxide
doc = open_document("report.pdf")
n = page_count(doc)
pages = Vector{String}(undef, n)
Threads.@threads for i in 0:(n - 1)
pages[i + 1] = extract_text(doc, i)
end
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, n} = PdfOxide.page_count(doc)
pages =
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end,
max_concurrency: 8, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
En el CPython estándar el GIL sigue serializando el trabajo a nivel Python, pero la propia extracción libera el GIL durante la ejecución en Rust — por lo que hay paralelismo genuino en el lado de Rust. Con cp314t (Python 3.14+ free-threaded), el GIL es opcional y los bindings declaran gil_used = false, por lo que no hay serialización implícita en absoluto.
Con asyncio:
Python
import asyncio
from pdf_oxide import PdfDocument
doc = PdfDocument("report.pdf")
async def main():
pages = await asyncio.gather(
*[asyncio.to_thread(doc.extract_text, i) for i in range(doc.page_count())]
)
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
var futures = IntStream.range(0, doc.pageCount())
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toList();
CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join();
var pages = futures.stream().map(CompletableFuture::join).toList();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
suspend fun extractAll(doc: PdfDocument): List<String> = coroutineScope {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import ExecutionContext.Implicits.global
def extractAll(doc: PdfDocument): Future[Seq[String]] =
Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Swift
import PdfOxide
func extractAll(_ doc: Document) async throws -> [String] {
let n = try doc.pageCount()
return try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, text) in group { out[i] = text }
return out
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
Future<List<String>> extractAll(PdfDocument doc) async => Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
Elixir
def extract_all(doc) do
{:ok, n} = PdfOxide.page_count(doc)
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
end
O usa el AsyncPdfDocument ya preparado de la guía de async.
Go
Las lecturas sobre *PdfDocument están protegidas por un sync.RWMutex interno — goroutine-safe por diseño.
Go
package main
import (
"sync"
pdfoxide "github.com/yfedoseev/pdf_oxide/go"
)
func main() {
doc, _ := pdfoxide.Open("report.pdf")
defer doc.Close()
count, _ := doc.PageCount()
results := make([]string, count)
var wg sync.WaitGroup
for i := 0; i < count; i++ {
wg.Add(1)
go func(page int) {
defer wg.Done()
text, _ := doc.ExtractText(page)
results[page] = text
}(i)
}
wg.Wait()
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
int count = doc.pageCount();
String[] results = new String[count];
ExecutorService pool = Executors.newFixedThreadPool(8);
var latch = new CountDownLatch(count);
for (int i = 0; i < count; i++) {
final int page = i;
pool.submit(() -> { results[page] = doc.extractText(page); latch.countDown(); });
}
latch.await();
pool.shutdown();
}
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
count = doc.page_count
results = Array.new(count)
(0...count).map { |i| Thread.new { results[i] = doc.extract_text(i) } }.each(&:join)
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <thread>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
int count = doc.page_count();
std::vector<std::string> results(count);
std::vector<std::thread> threads;
for (int i = 0; i < count; ++i)
threads.emplace_back([&doc, &results, i] { results[i] = doc.extract_text(i); });
for (auto& t : threads) t.join();
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let count = try doc.pageCount()
var results = [String](repeating: "", count: count)
try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<count { group.addTask { (i, try doc.extractText(i)) } }
for try await (i, t) in group { results[i] = t }
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final count = doc.pageCount;
final results = await Future.wait([
for (var i = 0; i < count; i++) Future(() => doc.extractText(i)),
]);
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
count <- pdf_page_count(doc)
results <- mclapply(0:(count - 1), function(i) pdf_extract_text(doc, i))
Julia
using PdfOxide
doc = open_document("report.pdf")
count = page_count(doc)
results = Vector{String}(undef, count)
Threads.@threads for i in 0:(count - 1)
results[i + 1] = extract_text(doc, i)
end
Zig
const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;
var doc = try pdf_oxide.Document.open("report.pdf");
const count = try doc.pageCount();
var i: usize = 0;
while (i < count) : (i += 1) {
const text = try doc.extractText(a, i); // internally locked reads
defer a.free(text);
}
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"report.pdf" error:&err];
NSInteger count = [doc pageCountError:&err];
NSMutableArray *results = [NSMutableArray arrayWithCapacity:count];
for (NSInteger i = 0; i < count; i++) [results addObject:[NSNull null]];
dispatch_apply(count, dispatch_get_global_queue(0, 0), ^(size_t i) {
NSError *e = nil;
results[i] = [doc extractText:i error:&e];
});
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, count} = PdfOxide.page_count(doc)
results =
0..(count - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
*DocumentEditor serializa las escrituras internamente, pero no encadenes en pipeline ediciones independientes desde múltiples goroutines — recopila las mutaciones en una sola goroutine.
C#
C#
using PdfOxide.Core;
using var doc = PdfDocument.Open("report.pdf");
var tasks = Enumerable.Range(0, doc.PageCount)
.Select(i => Task.Run(() => doc.ExtractText(i)));
string[] pages = await Task.WhenAll(tasks);
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
var tasks = IntStream.range(0, doc.pageCount())
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toList();
var pages = tasks.stream().map(CompletableFuture::join).toList();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
PdfDocument.open(java.nio.file.Path.of("report.pdf")).use { doc ->
val pages = runBlocking {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*, duration.*
import scala.util.Using
import ExecutionContext.Implicits.global
Using.resource(PdfDocument.open("report.pdf")) { doc =>
val pages = Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Await.result(pages, 60.seconds)
}
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
pages = (0...doc.page_count).map { |i| Thread.new { doc.extract_text(i) } }.map(&:value)
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <future>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
std::vector<std::future<std::string>> tasks;
for (int i = 0; i < doc.page_count(); ++i)
tasks.push_back(std::async(std::launch::async, [&doc, i] { return doc.extract_text(i); }));
std::vector<std::string> pages;
for (auto& t : tasks) pages.push_back(t.get());
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let pages = try await withThrowingTaskGroup(of: (Int, String).self) { group -> [String] in
let n = try doc.pageCount()
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, t) in group { out[i] = t }
return out
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final pages = await Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
pages <- mclapply(0:(pdf_page_count(doc) - 1), function(i) pdf_extract_text(doc, i))
Julia
using PdfOxide
doc = open_document("report.pdf")
n = page_count(doc)
pages = Vector{String}(undef, n)
Threads.@threads for i in 0:(n - 1)
pages[i + 1] = extract_text(doc, i)
end
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"report.pdf" error:&err];
NSInteger n = [doc pageCountError:&err];
NSMutableArray *pages = [NSMutableArray array];
for (NSInteger i = 0; i < n; i++) [pages addObject:[NSNull null]];
dispatch_apply(n, dispatch_get_global_queue(0, 0), ^(size_t i) {
NSError *e = nil;
pages[i] = [doc extractText:i error:&e];
});
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, n} = PdfOxide.page_count(doc)
pages =
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
Si necesitas semántica de lector/escritor más precisa en torno a un DocumentEditor:
C#
var locker = new ReaderWriterLockSlim();
locker.EnterReadLock();
try
{
string text = doc.ExtractText(0);
}
finally
{
locker.ExitReadLock();
}
Java
import java.util.concurrent.locks.ReentrantReadWriteLock;
var lock = new ReentrantReadWriteLock();
lock.readLock().lock();
try {
String text = doc.extractText(0);
} finally {
lock.readLock().unlock();
}
Kotlin
import java.util.concurrent.locks.ReentrantReadWriteLock
import kotlin.concurrent.read
val lock = ReentrantReadWriteLock()
val text = lock.read { doc.extractText(0) }
Scala
import java.util.concurrent.locks.ReentrantReadWriteLock
val lock = ReentrantReadWriteLock()
lock.readLock().lock()
val text = try doc.extractText(0) finally lock.readLock().unlock()
Ruby
require 'pdf_oxide'
mutex = Mutex.new
text = mutex.synchronize { doc.extract_text(0) }
C++
#include <shared_mutex>
std::shared_mutex lock;
std::string text;
{
std::shared_lock<std::shared_mutex> guard(lock); // shared (reader) lock
text = doc.extract_text(0);
}
Julia
lock = ReentrantLock()
text = Base.@lock lock extract_text(doc, 0)
Objective-C
pthread_rwlock_t lock;
pthread_rwlock_init(&lock, NULL);
pthread_rwlock_rdlock(&lock);
NSError *e = nil;
NSString *text = [doc extractText:0 error:&e];
pthread_rwlock_unlock(&lock);
Node.js
Un PdfDocument puede pasarse a worker threads transfiriendo el handle subyacente. El patrón más sencillo es dejar que los métodos *Async gestionen el despacho:
Node.js
const { PdfDocument } = require("pdf-oxide");
const doc = new PdfDocument("report.pdf");
try {
const pageCount = doc.getPageCount();
const pages = await Promise.all(
Array.from({ length: pageCount }, (_, i) => doc.extractTextAsync(i))
);
} finally {
doc.close();
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
int pageCount = doc.pageCount();
var futures = IntStream.range(0, pageCount)
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toList();
var pages = futures.stream().map(CompletableFuture::join).toList();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
suspend fun pages(doc: PdfDocument): List<String> = coroutineScope {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import ExecutionContext.Implicits.global
def pages(doc: PdfDocument): Future[Seq[String]] =
Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Swift
import PdfOxide
func pages(_ doc: Document) async throws -> [String] {
let n = try doc.pageCount()
return try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, t) in group { out[i] = t }
return out
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
Future<List<String>> pages(PdfDocument doc) async => Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
Elixir
def pages(doc) do
{:ok, n} = PdfOxide.page_count(doc)
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
end
Cada llamada *Async se ejecuta en el pool de hilos de libuv.
Serialización de escrituras
Las escrituras (DocumentEditor, Pdf, PdfCreator) no son lock-free. Si varios hilos necesitan modificar el mismo documento, canaliza todas las mutaciones a través de una única goroutine/tarea de escritura y distribuye las lecturas.
Un patrón habitual:
- 1
PdfDocumentde lectura compartido entre N hilos lectores. - 1
DocumentEditorde escritura en posesión de una única tarea coordinadora que recoge ediciones de un canal o cola.
Relacionados
- Procesamiento asíncrono — wrappers awaitable y configuración de
CancellationToken. - Procesamiento por lotes — procesamiento concurrente de múltiples archivos.
- Primeros pasos con Node.js — patrones de worker thread.