#pip install pytesseract PyMuPDF Pillow opencv-python import fitz import io from PIL import Image import pytesseract import cv2 pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' file = "../original/NOTES_ON_OPTICAL_PRINTER_TECHNIQUE.pdf" pdf_file = fitz.open(file) for page in pdf_file: pix = page.get_pixmap(dpi=300) filePath = "pages/page-%i.png" % page.number pix.save(filePath) image = cv2.imread(filePath) text = pytesseract.image_to_string(image, lang='eng', config='--psm 6 --oem 3') print(text)