#pip install pytesseract PyMuPDF import fitz import io file = "../original/NOTES_ON_OPTICAL_PRINTER_TECHNIQUE.pdf" pdf_file = fitz.open(file) for page in pdf_file: text = page.get_text().encode("utf8") print(text.decode("unicode_escape"))