Skip to main content
The DocumentConverterResult class represents the result of converting a document to Markdown. It contains the converted Markdown text and optional metadata.

Constructor

DocumentConverterResult(
    markdown: str,
    *,
    title: Optional[str] = None
)
Create a new conversion result.
markdown
str
required
The converted Markdown text.
title
str
Optional title of the document. Extracted from the document when available.

Example

from markitdown import DocumentConverterResult

result = DocumentConverterResult(
    markdown="# Hello World\n\nThis is the content.",
    title="Hello World"
)

Properties

markdown

markdown: str
The converted Markdown text.
markdown
str
The full Markdown content generated from the document.

Example

from markitdown import MarkItDown

md = MarkItDown()
result = md.convert("document.pdf")

print(result.markdown)
# Output: # Document Title
#
# Document content...

title

title: Optional[str]
The optional title of the document.
title
str | None
Document title extracted during conversion, or None if no title was found.

Example

result = md.convert("report.docx")

if result.title:
    print(f"Document title: {result.title}")
else:
    print("No title found")

text_content (deprecated)

text_content: str
text_content is soft-deprecated. Use markdown or str(result) instead.
Alias for the markdown property. Provided for backward compatibility.
text_content
str
Same as markdown. New code should use markdown directly.

Methods

__str__()

def __str__() -> str
Returns the converted Markdown text. Allows the result object to be used as a string.
markdown
str
The Markdown content.

Example

result = md.convert("document.pdf")

# These are all equivalent:
print(result.markdown)
print(result.text_content)  # deprecated
print(str(result))
print(result)  # Uses __str__

Usage Examples

Basic Conversion

from markitdown import MarkItDown

md = MarkItDown()
result = md.convert("document.docx")

print(f"Title: {result.title}")
print(f"Length: {len(result.markdown)} characters")
print("\nContent:")
print(result.markdown)

Saving to File

result = md.convert("presentation.pptx")

with open("output.md", "w", encoding="utf-8") as f:
    f.write(result.markdown)

print(f"Saved {len(result.markdown)} characters to output.md")

Processing Multiple Files

import os
from pathlib import Path

md = MarkItDown()
output_dir = Path("markdown_output")
output_dir.mkdir(exist_ok=True)

for file in Path("documents").glob("*.pdf"):
    result = md.convert(file)
    
    # Use title as filename if available
    if result.title:
        output_name = f"{result.title}.md"
    else:
        output_name = file.with_suffix(".md").name
    
    output_path = output_dir / output_name
    output_path.write_text(result.markdown, encoding="utf-8")
    
    print(f"Converted {file} -> {output_path}")

Extracting Metadata

def analyze_document(file_path):
    md = MarkItDown()
    result = md.convert(file_path)
    
    # Count various elements
    lines = result.markdown.split('\n')
    headings = [line for line in lines if line.startswith('#')]
    code_blocks = result.markdown.count('```')
    
    return {
        'title': result.title,
        'length': len(result.markdown),
        'lines': len(lines),
        'headings': len(headings),
        'code_blocks': code_blocks // 2  # Open and close
    }

metadata = analyze_document("report.docx")
print(f"Document: {metadata['title']}")
print(f"Length: {metadata['length']} characters")
print(f"Headings: {metadata['headings']}")
print(f"Code blocks: {metadata['code_blocks']}")

Custom Converter Implementation

from markitdown import DocumentConverter, DocumentConverterResult
from typing import BinaryIO, Any

class CustomConverter(DocumentConverter):
    def accepts(self, file_stream: BinaryIO, stream_info, **kwargs: Any) -> bool:
        return stream_info.extension == ".custom"
    
    def convert(self, file_stream: BinaryIO, stream_info, **kwargs: Any) -> DocumentConverterResult:
        content = file_stream.read().decode('utf-8')
        
        # Parse custom format
        lines = content.split('\n')
        title = lines[0] if lines else None
        body = '\n'.join(lines[1:]) if len(lines) > 1 else ""
        
        # Generate Markdown
        markdown = f"# {title}\n\n{body}"
        
        # Return result with metadata
        return DocumentConverterResult(
            markdown=markdown,
            title=title
        )

Return Value Processing

String Operations

result = md.convert("file.pdf")

# String methods work directly
if result.markdown.startswith("# "):
    print("Document has a title heading")

# Search for patterns
import re
emails = re.findall(r'\b[\w.-]+@[\w.-]+\.\w+\b', result.markdown)
print(f"Found {len(emails)} email addresses")

# Count words
words = len(result.markdown.split())
print(f"Word count: {words}")

Markdown Processing

import markdown
from bs4 import BeautifulSoup

result = md.convert("document.docx")

# Convert Markdown to HTML
html = markdown.markdown(result.markdown)

# Extract plain text
soup = BeautifulSoup(html, 'html.parser')
plain_text = soup.get_text()

print(f"Plain text length: {len(plain_text)} characters")

See Also