13 lines
256 B
Python
13 lines
256 B
Python
|
#pip install pytesseract PyMuPDF
|
||
|
|
||
|
import fitz
|
||
|
import io
|
||
|
|
||
|
file = "../original/NOTES_ON_OPTICAL_PRINTER_TECHNIQUE.pdf"
|
||
|
|
||
|
pdf_file = fitz.open(file)
|
||
|
|
||
|
for page in pdf_file:
|
||
|
text = page.get_text().encode("utf8")
|
||
|
print(text.decode("unicode_escape"))
|