notes-on-optical-printer-te.../extract/pdf.py

13 lines
256 B
Python

#pip install pytesseract PyMuPDF
import fitz
import io
file = "../original/NOTES_ON_OPTICAL_PRINTER_TECHNIQUE.pdf"
pdf_file = fitz.open(file)
for page in pdf_file:
text = page.get_text().encode("utf8")
print(text.decode("unicode_escape"))