+
    #j                        R t ^ RIt^ RItRR ltR t]R8X  d   ]P                  R,          t]'       d   ]^ ,          R9   d   ]! ] 4       ]P                  ! ^ 4       ]^ ,          R8X  d   ]! 4        ]P                  ! ^ 4       ]^ ,          t
RtRtRtR	]9   d"   ]P                  R	4      t]]^,           ,          tR
]9   d   RtR]9   d   Rt]! ]
]]]R7       R# R# )a{  Extract text from documents using marker-pdf. High-quality OCR + layout analysis.

Requires ~3-5GB disk (PyTorch + models downloaded on first use).
Supports: PDF, DOCX, PPTX, XLSX, HTML, EPUB, images.

Usage:
    python extract_marker.py document.pdf
    python extract_marker.py document.pdf --output_dir ./output
    python extract_marker.py presentation.pptx
    python extract_marker.py spreadsheet.xlsx
    python extract_marker.py scanned_doc.pdf           # OCR works here
    python extract_marker.py document.pdf --json        # Structured output
    python extract_marker.py document.pdf --use_llm     # LLM-boosted accuracy
NmarkdownFc                    ^ RI Hp ^ RIHp ^ RIHp / pV'       d   RVR&   V! V4      pV! 4       p	V! VP                  4       V	R7      p
V
! V 4      pVR8X  dO   ^ RIp\        TP                  R	VP                  R
\        VR
4      '       d   VP                  M/ /^RR7      4       M\        VP                  4       V'       d   \        VR4      '       d   VP                  '       d   ^ RIHp V! V4      P!                  RRR7       VP                  P#                  4        FR  w  r\$        P&                  P)                  W4      p\+        VR4      ;_uu_ 4       pVP-                  V4       RRR4       KT  	  \        R\/        VP                  4       RV R2\0        P2                  R7       R# R# R# R#   + '       g   i     K  ; i)    )PdfConverter)create_model_dict)ConfigParserTuse_llm)configartifact_dictjsonNr   metadataF)indentensure_asciiimages)Path)parentsexist_okwbz
Saved z image(s) to /)file)marker.converters.pdfr   marker.modelsr   marker.config.parserr   generate_config_dictr   printdumpsr   hasattrr   r   pathlibr   mkdiritemsospathjoinopenwritelensysstderr)r!   
output_diroutput_formatr   r   r   r   config_dictconfig_parsermodels	converterrenderedr   r   nameimg_dataimg_pathfs   &&&&              v/opt/hermes-venv/lib/python3.14/site-packages/../../../skills/productivity/ocr-and-documents/scripts/extract_marker.pyconvertr4      sw   2/1K!%I -M FM$F$F$HX^_IHdjj))WXz-J-J))PR
 %  ) 	*
 	h  gh11hooo Ztd;&oo335NDww||J5Hh%%! &% 6 	X__-.mJ<qIPSPZPZ[ 7F1z
 &%%s   F33Gc                     ^ RI p V P                  R4      P                  R	,          pV^8  d2   \        RVR R24       \        R4       \        P
                  ! ^4       \        RVR R24       R# )
z#Check disk space before installing.Nr   u   ⚠️  Only z.1fz4GB free. marker-pdf needs ~5GB for PyTorch + models.zGUse pymupdf instead (scripts/extract_pymupdf.py) or free up disk space.u   ✓ u%   GB free — sufficient for marker-pdfi   @)shutil
disk_usagefreer   r&   exit)r6   free_gbs     r3   check_requirementsr;   5   sc    $))W5G{gc]*^_`WX	DB
CD    __main__:   NNz--checkz--output_dirz--jsonr   z	--use_llmT)r(   r)   r   )Nr   F>   -h--help)__doc__r&   r    r4   r;   __name__argvargsr   r9   r!   r(   r)   r   indexidx r<   r3   <module>rH      s     	\DE z88B<D47..gAw)7DJMGjj(#']
4dDZ}gV/ r<   