The DocumentConverterResult class represents the result of converting a document to Markdown. It contains the converted Markdown text and optional metadata.
Constructor
DocumentConverterResult(
markdown: str,
*,
title: Optional[str] = None
)
Create a new conversion result.
The converted Markdown text.
Optional title of the document. Extracted from the document when available.
Example
from markitdown import DocumentConverterResult
result = DocumentConverterResult(
markdown="# Hello World\n\nThis is the content.",
title="Hello World"
)
Properties
markdown
The converted Markdown text.
The full Markdown content generated from the document.
Example
from markitdown import MarkItDown
md = MarkItDown()
result = md.convert("document.pdf")
print(result.markdown)
# Output: # Document Title
#
# Document content...
title
The optional title of the document.
Document title extracted during conversion, or None if no title was found.
Example
result = md.convert("report.docx")
if result.title:
print(f"Document title: {result.title}")
else:
print("No title found")
text_content (deprecated)
text_content is soft-deprecated. Use markdown or str(result) instead.
Alias for the markdown property. Provided for backward compatibility.
Same as markdown. New code should use markdown directly.
Methods
__str__()
Returns the converted Markdown text. Allows the result object to be used as a string.
Example
result = md.convert("document.pdf")
# These are all equivalent:
print(result.markdown)
print(result.text_content) # deprecated
print(str(result))
print(result) # Uses __str__
Usage Examples
Basic Conversion
from markitdown import MarkItDown
md = MarkItDown()
result = md.convert("document.docx")
print(f"Title: {result.title}")
print(f"Length: {len(result.markdown)} characters")
print("\nContent:")
print(result.markdown)
Saving to File
result = md.convert("presentation.pptx")
with open("output.md", "w", encoding="utf-8") as f:
f.write(result.markdown)
print(f"Saved {len(result.markdown)} characters to output.md")
Processing Multiple Files
import os
from pathlib import Path
md = MarkItDown()
output_dir = Path("markdown_output")
output_dir.mkdir(exist_ok=True)
for file in Path("documents").glob("*.pdf"):
result = md.convert(file)
# Use title as filename if available
if result.title:
output_name = f"{result.title}.md"
else:
output_name = file.with_suffix(".md").name
output_path = output_dir / output_name
output_path.write_text(result.markdown, encoding="utf-8")
print(f"Converted {file} -> {output_path}")
def analyze_document(file_path):
md = MarkItDown()
result = md.convert(file_path)
# Count various elements
lines = result.markdown.split('\n')
headings = [line for line in lines if line.startswith('#')]
code_blocks = result.markdown.count('```')
return {
'title': result.title,
'length': len(result.markdown),
'lines': len(lines),
'headings': len(headings),
'code_blocks': code_blocks // 2 # Open and close
}
metadata = analyze_document("report.docx")
print(f"Document: {metadata['title']}")
print(f"Length: {metadata['length']} characters")
print(f"Headings: {metadata['headings']}")
print(f"Code blocks: {metadata['code_blocks']}")
Custom Converter Implementation
from markitdown import DocumentConverter, DocumentConverterResult
from typing import BinaryIO, Any
class CustomConverter(DocumentConverter):
def accepts(self, file_stream: BinaryIO, stream_info, **kwargs: Any) -> bool:
return stream_info.extension == ".custom"
def convert(self, file_stream: BinaryIO, stream_info, **kwargs: Any) -> DocumentConverterResult:
content = file_stream.read().decode('utf-8')
# Parse custom format
lines = content.split('\n')
title = lines[0] if lines else None
body = '\n'.join(lines[1:]) if len(lines) > 1 else ""
# Generate Markdown
markdown = f"# {title}\n\n{body}"
# Return result with metadata
return DocumentConverterResult(
markdown=markdown,
title=title
)
Return Value Processing
String Operations
result = md.convert("file.pdf")
# String methods work directly
if result.markdown.startswith("# "):
print("Document has a title heading")
# Search for patterns
import re
emails = re.findall(r'\b[\w.-]+@[\w.-]+\.\w+\b', result.markdown)
print(f"Found {len(emails)} email addresses")
# Count words
words = len(result.markdown.split())
print(f"Word count: {words}")
Markdown Processing
import markdown
from bs4 import BeautifulSoup
result = md.convert("document.docx")
# Convert Markdown to HTML
html = markdown.markdown(result.markdown)
# Extract plain text
soup = BeautifulSoup(html, 'html.parser')
plain_text = soup.get_text()
print(f"Plain text length: {len(plain_text)} characters")
See Also