동시성 — 스레드 안전 PDF 읽기
PdfDocument는 v0.3.22부터 Rust 레벨에서 Send + Sync를 구현합니다. 하나의 문서를 OS 스레드, 고루틴, 워커 스레드, asyncio 태스크에 걸쳐 공유하고 페이지를 병렬로 추출할 수 있습니다. 쓰기 작업은 여전히 직렬화가 필요합니다 — 그것이 바로 DocumentEditor의 역할입니다.
v0.3.22에서 변경된 사항
PdfDocument 내부의 16개 RefCell<T> 래퍼가 모두 Mutex<T>로 교체되었고, Cell<usize>는 AtomicUsize가 되었습니다. 언어 바인딩에서는 Python 클래스(PdfDocument, PdfPage, FormField)에 붙어있던 unsendable 마커가 제거되었습니다. 이 마커는 객체가 스레드 경계를 넘는 순간 RuntimeError를 발생시켰습니다.
최종 결과: 스레드 풀, 비동기 런타임, 프리스레드 Python 모두 이제 별도 설정 없이 동작합니다.
Rust
Rust
use pdf_oxide::PdfDocument;
use std::sync::Arc;
use std::thread;
let doc = Arc::new(PdfDocument::open("report.pdf")?);
let page_count = doc.page_count();
let handles: Vec<_> = (0..page_count)
.map(|i| {
let doc = Arc::clone(&doc);
thread::spawn(move || doc.extract_text(i))
})
.collect();
for h in handles {
let text = h.join().unwrap()?;
println!("{}", text);
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
int pageCount = doc.pageCount();
ExecutorService pool = Executors.newFixedThreadPool(8);
List<Future<String>> futures = IntStream.range(0, pageCount)
.mapToObj(i -> pool.submit(() -> doc.extractText(i)))
.collect(Collectors.toList());
for (Future<String> f : futures) System.out.println(f.get());
pool.shutdown();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
PdfDocument.open(java.nio.file.Path.of("report.pdf")).use { doc ->
val pages = runBlocking(Dispatchers.IO) {
(0 until doc.pageCount())
.map { i -> async { doc.extractText(i) } }
.awaitAll()
}
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import scala.concurrent.duration.*
import scala.util.Using
import java.util.concurrent.Executors
import ExecutionContext.Implicits.global
Using.resource(PdfDocument.open("report.pdf")) { doc =>
val pages = (0 until doc.pageCount()).map(i => Future(doc.extractText(i)))
Await.result(Future.sequence(pages), 60.seconds)
}
Clojure
(require '[pdf-oxide.core :as pdf])
(with-open [doc (pdf/open "report.pdf")]
(->> (range (pdf/page-count doc))
(map (fn [i] (future (pdf/extract-text doc i))))
(doall)
(map deref)))
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
pages = (0...doc.page_count).map do |i|
Thread.new { doc.extract_text(i) }
end.map(&:value)
end
PHP
use PdfOxide\PdfDocument;
// PHP는 공유 메모리 스레드를 지원하지 않으므로 순차적으로 처리합니다
// (읽기는 내부적으로 잠금 처리되어 pthreads/parallel 확장에서도 안전합니다).
$doc = PdfDocument::open('report.pdf');
$pages = [];
for ($i = 0; $i < $doc->pageCount(); $i++) {
$pages[$i] = $doc->extractText($i);
}
$doc->close();
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <future>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
int page_count = doc.page_count();
std::vector<std::future<std::string>> futures;
for (int i = 0; i < page_count; ++i)
futures.push_back(std::async(std::launch::async,
[&doc, i] { return doc.extract_text(i); }));
for (auto& f : futures) std::cout << f.get();
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let pageCount = try doc.pageCount()
try await withThrowingTaskGroup(of: String.self) { group in
for i in 0..<pageCount {
group.addTask { try doc.extractText(i) }
}
for try await text in group { print(text) }
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final pages = [
for (var i = 0; i < doc.pageCount; i++) doc.extractText(i),
];
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
n <- pdf_page_count(doc)
pages <- mclapply(0:(n - 1), function(i) pdf_extract_text(doc, i))
Julia
using PdfOxide
doc = open_document("report.pdf")
n = page_count(doc)
pages = Vector{String}(undef, n)
Threads.@threads for i in 0:(n - 1)
pages[i + 1] = extract_text(doc, i)
end
Zig
const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;
var doc = try pdf_oxide.Document.open("report.pdf");
const n = try doc.pageCount();
var i: usize = 0;
while (i < n) : (i += 1) {
const text = try doc.extractText(a, i); // reads are internally locked
defer a.free(text);
}
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"report.pdf" error:&err];
NSInteger n = [doc pageCountError:&err];
dispatch_apply(n, dispatch_get_global_queue(0, 0), ^(size_t i) {
NSError *e = nil;
NSString *text = [doc extractText:i error:&e];
});
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, n} = PdfOxide.page_count(doc)
pages =
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
tokio를 사용하는 경우:
Rust
use std::sync::Arc;
use tokio::task;
let doc = Arc::new(pdf_oxide::PdfDocument::open("report.pdf")?);
let tasks: Vec<_> = (0..doc.page_count())
.map(|i| {
let doc = Arc::clone(&doc);
task::spawn_blocking(move || doc.extract_text(i))
})
.collect();
for t in tasks {
let text = t.await??;
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
CompletableFuture<?>[] tasks = IntStream.range(0, doc.pageCount())
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toArray(CompletableFuture[]::new);
CompletableFuture.allOf(tasks).join();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
suspend fun extractAll(doc: PdfDocument): List<String> = coroutineScope {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import ExecutionContext.Implicits.global
def extractAll(doc: PdfDocument): Future[Seq[String]] =
Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Swift
import PdfOxide
func extractAll(_ doc: Document) async throws -> [String] {
let n = try doc.pageCount()
return try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, text) in group { out[i] = text }
return out
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
Future<List<String>> extractAll(PdfDocument doc) async {
return Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
}
Elixir
def extract_all(doc) do
{:ok, n} = PdfOxide.page_count(doc)
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
end
Python
Python
from concurrent.futures import ThreadPoolExecutor
from pdf_oxide import PdfDocument
doc = PdfDocument("report.pdf")
with ThreadPoolExecutor(max_workers=8) as pool:
pages = list(pool.map(doc.extract_text, range(doc.page_count())))
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newFixedThreadPool(8);
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
List<String> pages = IntStream.range(0, doc.pageCount())
.mapToObj(i -> pool.submit(() -> doc.extractText(i)))
.collect(Collectors.toList())
.stream().map(f -> { try { return f.get(); } catch (Exception e) { throw new RuntimeException(e); } })
.collect(Collectors.toList());
}
pool.shutdown();
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
pages = (0...doc.page_count)
.map { |i| Thread.new { doc.extract_text(i) } }
.map(&:value)
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <future>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
std::vector<std::future<std::string>> futures;
for (int i = 0; i < doc.page_count(); ++i)
futures.push_back(std::async(std::launch::async,
[&doc, i] { return doc.extract_text(i); }));
std::vector<std::string> pages;
for (auto& f : futures) pages.push_back(f.get());
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let pages = try await withThrowingTaskGroup(of: (Int, String).self) { group -> [String] in
let n = try doc.pageCount()
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, t) in group { out[i] = t }
return out
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final pages = await Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
n <- pdf_page_count(doc)
pages <- mclapply(0:(n - 1), function(i) pdf_extract_text(doc, i), mc.cores = 8)
Julia
using PdfOxide
doc = open_document("report.pdf")
n = page_count(doc)
pages = Vector{String}(undef, n)
Threads.@threads for i in 0:(n - 1)
pages[i + 1] = extract_text(doc, i)
end
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, n} = PdfOxide.page_count(doc)
pages =
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end,
max_concurrency: 8, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
일반 CPython에서는 GIL이 Python 레벨의 작업을 직렬화하지만, 추출 자체는 Rust 실행 중 GIL을 해제하므로 Rust 쪽에서는 진정한 병렬 처리가 이루어집니다. cp314t(프리스레드 Python 3.14+)에서는 GIL이 선택적이고 바인딩이 gil_used = false를 선언하므로 암묵적인 직렬화가 전혀 없습니다.
asyncio를 사용하는 경우:
Python
import asyncio
from pdf_oxide import PdfDocument
doc = PdfDocument("report.pdf")
async def main():
pages = await asyncio.gather(
*[asyncio.to_thread(doc.extract_text, i) for i in range(doc.page_count())]
)
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
var futures = IntStream.range(0, doc.pageCount())
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toList();
CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join();
var pages = futures.stream().map(CompletableFuture::join).toList();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
suspend fun extractAll(doc: PdfDocument): List<String> = coroutineScope {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import ExecutionContext.Implicits.global
def extractAll(doc: PdfDocument): Future[Seq[String]] =
Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Swift
import PdfOxide
func extractAll(_ doc: Document) async throws -> [String] {
let n = try doc.pageCount()
return try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, text) in group { out[i] = text }
return out
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
Future<List<String>> extractAll(PdfDocument doc) async => Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
Elixir
def extract_all(doc) do
{:ok, n} = PdfOxide.page_count(doc)
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
end
또는 비동기 가이드에서 제공하는 AsyncPdfDocument를 바로 사용하세요.
Go
*PdfDocument에 대한 읽기는 내부 sync.RWMutex로 보호되어 있어 설계 상 고루틴 안전합니다.
Go
package main
import (
"sync"
pdfoxide "github.com/yfedoseev/pdf_oxide/go"
)
func main() {
doc, _ := pdfoxide.Open("report.pdf")
defer doc.Close()
count, _ := doc.PageCount()
results := make([]string, count)
var wg sync.WaitGroup
for i := 0; i < count; i++ {
wg.Add(1)
go func(page int) {
defer wg.Done()
text, _ := doc.ExtractText(page)
results[page] = text
}(i)
}
wg.Wait()
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
int count = doc.pageCount();
String[] results = new String[count];
ExecutorService pool = Executors.newFixedThreadPool(8);
var latch = new CountDownLatch(count);
for (int i = 0; i < count; i++) {
final int page = i;
pool.submit(() -> { results[page] = doc.extractText(page); latch.countDown(); });
}
latch.await();
pool.shutdown();
}
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
count = doc.page_count
results = Array.new(count)
(0...count).map { |i| Thread.new { results[i] = doc.extract_text(i) } }.each(&:join)
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <thread>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
int count = doc.page_count();
std::vector<std::string> results(count);
std::vector<std::thread> threads;
for (int i = 0; i < count; ++i)
threads.emplace_back([&doc, &results, i] { results[i] = doc.extract_text(i); });
for (auto& t : threads) t.join();
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let count = try doc.pageCount()
var results = [String](repeating: "", count: count)
try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<count { group.addTask { (i, try doc.extractText(i)) } }
for try await (i, t) in group { results[i] = t }
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final count = doc.pageCount;
final results = await Future.wait([
for (var i = 0; i < count; i++) Future(() => doc.extractText(i)),
]);
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
count <- pdf_page_count(doc)
results <- mclapply(0:(count - 1), function(i) pdf_extract_text(doc, i))
Julia
using PdfOxide
doc = open_document("report.pdf")
count = page_count(doc)
results = Vector{String}(undef, count)
Threads.@threads for i in 0:(count - 1)
results[i + 1] = extract_text(doc, i)
end
Zig
const pdf_oxide = @import("pdf_oxide");
const a = std.heap.page_allocator;
var doc = try pdf_oxide.Document.open("report.pdf");
const count = try doc.pageCount();
var i: usize = 0;
while (i < count) : (i += 1) {
const text = try doc.extractText(a, i); // internally locked reads
defer a.free(text);
}
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"report.pdf" error:&err];
NSInteger count = [doc pageCountError:&err];
NSMutableArray *results = [NSMutableArray arrayWithCapacity:count];
for (NSInteger i = 0; i < count; i++) [results addObject:[NSNull null]];
dispatch_apply(count, dispatch_get_global_queue(0, 0), ^(size_t i) {
NSError *e = nil;
results[i] = [doc extractText:i error:&e];
});
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, count} = PdfOxide.page_count(doc)
results =
0..(count - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
*DocumentEditor는 내부적으로 쓰기를 직렬화하지만, 여러 고루틴에서 독립적인 편집을 파이프라인 처리하지 마세요 — 변경 사항은 하나의 고루틴에서 수집하세요.
C#
C#
using PdfOxide.Core;
using var doc = PdfDocument.Open("report.pdf");
var tasks = Enumerable.Range(0, doc.PageCount)
.Select(i => Task.Run(() => doc.ExtractText(i)));
string[] pages = await Task.WhenAll(tasks);
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
var tasks = IntStream.range(0, doc.pageCount())
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toList();
var pages = tasks.stream().map(CompletableFuture::join).toList();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
PdfDocument.open(java.nio.file.Path.of("report.pdf")).use { doc ->
val pages = runBlocking {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*, duration.*
import scala.util.Using
import ExecutionContext.Implicits.global
Using.resource(PdfDocument.open("report.pdf")) { doc =>
val pages = Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Await.result(pages, 60.seconds)
}
Ruby
require 'pdf_oxide'
PdfOxide::PdfDocument.open('report.pdf') do |doc|
pages = (0...doc.page_count).map { |i| Thread.new { doc.extract_text(i) } }.map(&:value)
end
C++
#include <pdf_oxide/pdf_oxide.hpp>
#include <future>
#include <vector>
auto doc = pdf_oxide::Document::open("report.pdf");
std::vector<std::future<std::string>> tasks;
for (int i = 0; i < doc.page_count(); ++i)
tasks.push_back(std::async(std::launch::async, [&doc, i] { return doc.extract_text(i); }));
std::vector<std::string> pages;
for (auto& t : tasks) pages.push_back(t.get());
Swift
import PdfOxide
let doc = try Document.open("report.pdf")
let pages = try await withThrowingTaskGroup(of: (Int, String).self) { group -> [String] in
let n = try doc.pageCount()
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, t) in group { out[i] = t }
return out
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
final doc = PdfDocument.open('report.pdf');
final pages = await Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
doc.close();
R
library(pdfoxide)
library(parallel)
doc <- pdf_open("report.pdf")
pages <- mclapply(0:(pdf_page_count(doc) - 1), function(i) pdf_extract_text(doc, i))
Julia
using PdfOxide
doc = open_document("report.pdf")
n = page_count(doc)
pages = Vector{String}(undef, n)
Threads.@threads for i in 0:(n - 1)
pages[i + 1] = extract_text(doc, i)
end
Objective-C
#import "POXPdfOxide.h"
NSError *err = nil;
POXDocument *doc = [POXDocument openPath:@"report.pdf" error:&err];
NSInteger n = [doc pageCountError:&err];
NSMutableArray *pages = [NSMutableArray array];
for (NSInteger i = 0; i < n; i++) [pages addObject:[NSNull null]];
dispatch_apply(n, dispatch_get_global_queue(0, 0), ^(size_t i) {
NSError *e = nil;
pages[i] = [doc extractText:i error:&e];
});
Elixir
{:ok, doc} = PdfOxide.open("report.pdf")
{:ok, n} = PdfOxide.page_count(doc)
pages =
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
DocumentEditor 주변에서 세밀한 읽기/쓰기 시맨틱이 필요한 경우:
C#
var locker = new ReaderWriterLockSlim();
locker.EnterReadLock();
try
{
string text = doc.ExtractText(0);
}
finally
{
locker.ExitReadLock();
}
Java
import java.util.concurrent.locks.ReentrantReadWriteLock;
var lock = new ReentrantReadWriteLock();
lock.readLock().lock();
try {
String text = doc.extractText(0);
} finally {
lock.readLock().unlock();
}
Kotlin
import java.util.concurrent.locks.ReentrantReadWriteLock
import kotlin.concurrent.read
val lock = ReentrantReadWriteLock()
val text = lock.read { doc.extractText(0) }
Scala
import java.util.concurrent.locks.ReentrantReadWriteLock
val lock = ReentrantReadWriteLock()
lock.readLock().lock()
val text = try doc.extractText(0) finally lock.readLock().unlock()
Ruby
require 'pdf_oxide'
mutex = Mutex.new
text = mutex.synchronize { doc.extract_text(0) }
C++
#include <shared_mutex>
std::shared_mutex lock;
std::string text;
{
std::shared_lock<std::shared_mutex> guard(lock); // shared (reader) lock
text = doc.extract_text(0);
}
Julia
lock = ReentrantLock()
text = Base.@lock lock extract_text(doc, 0)
Objective-C
pthread_rwlock_t lock;
pthread_rwlock_init(&lock, NULL);
pthread_rwlock_rdlock(&lock);
NSError *e = nil;
NSString *text = [doc extractText:0 error:&e];
pthread_rwlock_unlock(&lock);
Node.js
PdfDocument는 백킹 핸들을 전달해 워커 스레드로 넘길 수 있습니다. 더 간단한 방법은 *Async 메서드가 디스패치를 처리하도록 하는 것입니다:
Node.js
const { PdfDocument } = require("pdf-oxide");
const doc = new PdfDocument("report.pdf");
try {
const pageCount = doc.getPageCount();
const pages = await Promise.all(
Array.from({ length: pageCount }, (_, i) => doc.extractTextAsync(i))
);
} finally {
doc.close();
}
Java
import fyi.oxide.pdf.PdfDocument;
import java.util.concurrent.*;
import java.util.stream.*;
ExecutorService pool = Executors.newWorkStealingPool();
try (PdfDocument doc = PdfDocument.open(java.nio.file.Path.of("report.pdf"))) {
int pageCount = doc.pageCount();
var futures = IntStream.range(0, pageCount)
.mapToObj(i -> CompletableFuture.supplyAsync(() -> doc.extractText(i), pool))
.toList();
var pages = futures.stream().map(CompletableFuture::join).toList();
}
Kotlin
import fyi.oxide.pdf.PdfDocument
import kotlinx.coroutines.*
suspend fun pages(doc: PdfDocument): List<String> = coroutineScope {
(0 until doc.pageCount())
.map { i -> async(Dispatchers.IO) { doc.extractText(i) } }
.awaitAll()
}
Scala
import fyi.oxide.pdf.PdfDocument
import scala.concurrent.*
import ExecutionContext.Implicits.global
def pages(doc: PdfDocument): Future[Seq[String]] =
Future.traverse(0 until doc.pageCount())(i => Future(doc.extractText(i)))
Swift
import PdfOxide
func pages(_ doc: Document) async throws -> [String] {
let n = try doc.pageCount()
return try await withThrowingTaskGroup(of: (Int, String).self) { group in
for i in 0..<n { group.addTask { (i, try doc.extractText(i)) } }
var out = [String](repeating: "", count: n)
for try await (i, t) in group { out[i] = t }
return out
}
}
Dart
import 'package:pdf_oxide/pdf_oxide.dart';
Future<List<String>> pages(PdfDocument doc) async => Future.wait([
for (var i = 0; i < doc.pageCount; i++) Future(() => doc.extractText(i)),
]);
Elixir
def pages(doc) do
{:ok, n} = PdfOxide.page_count(doc)
0..(n - 1)
|> Task.async_stream(fn i -> PdfOxide.extract_text(doc, i) end, ordered: true)
|> Enum.map(fn {:ok, {:ok, text}} -> text end)
end
각 *Async 호출은 libuv 스레드 풀에서 실행됩니다.
쓰기 직렬화
쓰기 작업(DocumentEditor, Pdf, PdfCreator)은 락 프리가 아닙니다. 여러 스레드가 같은 문서를 수정해야 한다면, 모든 변경을 하나의 writer 고루틴/태스크로 집중시키고 읽기를 분산시키세요.
일반적인 패턴:
- 읽기용
PdfDocument1개를 N개의 리더 스레드가 공유한다. DocumentEditor1개를 단일 코디네이터 태스크가 소유하며, 채널이나 큐에서 편집을 수집한다.
관련 문서
- 비동기 처리 — awaitable 래퍼와
CancellationToken설정. - 배치 처리 — 여러 파일의 동시 처리.
- Node.js 시작하기 — 워커 스레드 패턴.