Back to Blog

Download SEC 10-K Filings Programmatically — Python & JavaScript Guide

A step-by-step guide to finding, downloading, and parsing 10-K annual report filings from SEC EDGAR using Python and JavaScript. Works for any filing type.

The Download Pipeline

Downloading a 10-K filing from EDGAR requires four steps:

  1. Look up the CIK — The company's unique SEC identifier (CIK lookup guide)
  2. Get the submissions list — Find the 10-K filing and its accession number
  3. Construct the filing URL — Navigate the EDGAR directory structure
  4. Download the document — Fetch the HTML/text content

Understanding EDGAR's Directory Structure

Every filing on EDGAR follows this URL pattern:

https://www.sec.gov/Archives/edgar/data/{CIK}/{ACCESSION_NO_DASHES}/{FILENAME}

For example, Apple's 2024 10-K filing:

https://www.sec.gov/Archives/edgar/data/320193/000032019324000123/aapl-20240928.htm

The accession number is the key. It uniquely identifies each filing and is formatted as XXXXXXXXXX-YY-ZZZZZZ (filer ID, year, sequence). In URLs, the dashes are removed.

Python Implementation

Step 1: Look Up CIK and Get Filing List

import requests
import time
import os
from html.parser import HTMLParser

HEADERS = {'User-Agent': 'FilingDownloader [email protected]'}

def get_submissions(cik):
    """Fetch the filing submissions for a company."""
    cik_padded = str(cik).zfill(10)
    url = f'https://data.sec.gov/submissions/CIK{cik_padded}.json'
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    return response.json()

Step 2: Find the 10-K Filing

def find_10k_filings(submissions, count=5):
    """Find the most recent 10-K filings from submissions data.

    Returns list of dicts with accession number, date, and primary document.
    """
    recent = submissions['filings']['recent']
    filings = []

    for i in range(len(recent['form'])):
        if recent['form'][i] == '10-K':
            filings.append({
                'accession': recent['accessionNumber'][i],
                'date': recent['filingDate'][i],
                'primary_doc': recent['primaryDocument'][i],
                'description': recent['primaryDocDescription'][i],
            })
            if len(filings) >= count:
                break

    return filings

# Example: Find Apple's recent 10-K filings
submissions = get_submissions(320193)
filings_10k = find_10k_filings(submissions)

for f in filings_10k:
    print(f"  {f['date']}: {f['accession']} - {f['primary_doc']}")

Step 3: Construct the Download URL

def build_filing_url(cik, accession, filename):
    """Construct the full URL to a filing document on EDGAR."""
    accession_no_dashes = accession.replace('-', '')
    return (f'https://www.sec.gov/Archives/edgar/data/'
            f'{cik}/{accession_no_dashes}/{filename}')

# Build URL for Apple's latest 10-K
filing = filings_10k[0]
url = build_filing_url(320193, filing['accession'], filing['primary_doc'])
print(f'Filing URL: {url}')

Step 4: Download and Save the Filing

def download_filing(cik, accession, filename, save_dir='filings'):
    """Download a filing document and save it to disk."""
    url = build_filing_url(cik, accession, filename)
    time.sleep(0.12)  # Rate limit: 10 req/sec

    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()

    # Create save directory if needed
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, f'{accession}_{filename}')

    with open(save_path, 'w', encoding='utf-8') as f:
        f.write(response.text)

    print(f'  Saved: {save_path} ({len(response.text):,} chars)')
    return save_path

# Download Apple's latest 10-K
filing = filings_10k[0]
path = download_filing(320193, filing['accession'], filing['primary_doc'])

Complete Script: Download Multiple 10-K Filings

def download_company_10ks(ticker, cik, count=3):
    """Download the most recent 10-K filings for a company."""
    print(f'\nFetching {ticker} (CIK {cik})...')
    submissions = get_submissions(cik)
    filings = find_10k_filings(submissions, count=count)

    if not filings:
        print(f'  No 10-K filings found for {ticker}')
        return []

    paths = []
    for filing in filings:
        try:
            path = download_filing(
                cik, filing['accession'], filing['primary_doc'],
                save_dir=f'filings/{ticker}'
            )
            paths.append(path)
        except Exception as e:
            print(f'  Error downloading {filing["accession"]}: {e}')

    return paths

# Download 10-Ks for several companies
companies = {
    'AAPL': '320193',
    'MSFT': '789019',
    'TSLA': '1318605',
}

for ticker, cik in companies.items():
    download_company_10ks(ticker, cik, count=3)

JavaScript Implementation

const HEADERS = { 'User-Agent': 'FilingDownloader [email protected]' };

async function getSubmissions(cik) {
  const padded = String(cik).padStart(10, '0');
  const url = `https://data.sec.gov/submissions/CIK${padded}.json`;
  const res = await fetch(url, { headers: HEADERS });
  if (!res.ok) throw new Error(`HTTP ${res.status}`);
  return res.json();
}

async function downloadLatest10K(cik) {
  const subs = await getSubmissions(cik);
  const recent = subs.filings.recent;

  // Find the first 10-K
  const idx = recent.form.findIndex(f => f === '10-K');
  if (idx === -1) throw new Error('No 10-K found');

  const accession = recent.accessionNumber[idx];
  const filename = recent.primaryDocument[idx];
  const accNoDashes = accession.replace(/-/g, '');

  const filingUrl = `https://www.sec.gov/Archives/edgar/data/${cik}/${accNoDashes}/${filename}`;

  // Rate limit
  await new Promise(r => setTimeout(r, 120));

  const res = await fetch(filingUrl, { headers: HEADERS });
  if (!res.ok) throw new Error(`HTTP ${res.status}`);

  return {
    url: filingUrl,
    date: recent.filingDate[idx],
    accession,
    html: await res.text()
  };
}

// Usage
const filing = await downloadLatest10K('320193');
console.log(`Downloaded Apple 10-K from ${filing.date}`);
console.log(`  URL: ${filing.url}`);
console.log(`  Size: ${filing.html.length.toLocaleString()} chars`);

Extracting Text from HTML Filings

Most modern 10-K filings are HTML documents. To extract plain text for analysis:

from bs4 import BeautifulSoup

def extract_text(html_content):
    """Extract clean text from an HTML SEC filing."""
    soup = BeautifulSoup(html_content, 'html.parser')

    # Remove script and style elements
    for tag in soup(['script', 'style']):
        tag.decompose()

    text = soup.get_text(separator='\n')

    # Clean up whitespace
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    return '\n'.join(lines)

# Read a downloaded filing and extract text
with open('filings/AAPL/0000320193-24-000123_aapl-20240928.htm') as f:
    html = f.read()

text = extract_text(html)
print(f'Extracted {len(text):,} characters of text')
print(text[:500])  # Preview first 500 chars

Handling the EDGAR Filing Index

Each filing has an index page listing all associated documents (HTML, XBRL, exhibits). Access it by replacing the filename with index.json:

def get_filing_index(cik, accession):
    """Get the filing index listing all documents."""
    acc_no_dashes = accession.replace('-', '')
    url = f'https://www.sec.gov/Archives/edgar/data/{cik}/{acc_no_dashes}/index.json'
    time.sleep(0.12)
    response = requests.get(url, headers=HEADERS)
    return response.json()

# List all documents in a filing
index = get_filing_index(320193, filings_10k[0]['accession'])
for item in index['directory']['item']:
    print(f"  {item['name']} ({item.get('type', 'unknown')})")

Rate Limit Handling

The SEC blocks IPs that exceed 10 requests per second. For bulk downloads, implement proper throttling:

import time

class RateLimiter:
    """Simple rate limiter for SEC API calls."""
    def __init__(self, max_per_second=8):
        self.min_interval = 1.0 / max_per_second
        self.last_call = 0

    def wait(self):
        elapsed = time.time() - self.last_call
        if elapsed < self.min_interval:
            time.sleep(self.min_interval - elapsed)
        self.last_call = time.time()

limiter = RateLimiter(max_per_second=8)  # Stay under the 10/sec limit

for ticker, cik in companies.items():
    limiter.wait()
    submissions = get_submissions(cik)
    # ... process filings

For more details on rate limits and caching strategies, see our SEC EDGAR API Rate Limits & Best Practices guide.

FAQ

How do I download a 10-K filing from SEC EDGAR?

Look up the company's CIK, fetch their submissions from data.sec.gov, find the 10-K accession number, then download the document from the EDGAR Archives directory.

What is an accession number?

An accession number uniquely identifies each SEC filing. Format: XXXXXXXXXX-YY-ZZZZZZ (filer CIK, year, sequence). You need it to construct the download URL.

Can I download SEC filings in bulk?

Yes, but respect the 10 req/sec rate limit. For very large downloads, the SEC provides bulk data files at sec.gov/files/.

What format are SEC 10-K filings in?

Most modern 10-K filings are HTML. Older filings may be plain text. Some include XBRL in XML format. The filing index lists all available formats.

How do I extract text from an HTML SEC filing?

Use Python's BeautifulSoup library to parse HTML and extract text. Install with pip install beautifulsoup4, then use soup.get_text() to strip HTML tags.

Related Guides