pip install sec-api

from sec_api import RenderApirenderApi = RenderApi(api_key="YOUR_API_KEY")# examplesurl_8k_html       = "https://www.sec.gov/Archives/edgar/data/1045810/000104581023000014/nvda-20230222.htm"url_8k_txt        = "https://www.sec.gov/Archives/edgar/data/1045810/000104581023000014/0001045810-23-000014.txt"url_exhibit99     = "https://www.sec.gov/Archives/edgar/data/1045810/000104581023000014/q4fy23pr.htm"url_xbrl_instance = "https://www.sec.gov/Archives/edgar/data/1045810/000104581023000014/nvda-20230222_htm.xml"url_excel_file    = "https://www.sec.gov/Archives/edgar/data/1045810/000104581023000014/Financial_Report.xlsx"url_pdf_file      = "https://www.sec.gov/Archives/edgar/data/1798925/999999999724004095/filename1.pdf"url_image_file    = "https://www.sec.gov/Archives/edgar/data/1424404/000106299324017776/form10kxz001.jpg"filing_8k_html = renderApi.get_filing(url_8k_html)filing_8k_txt  = renderApi.get_filing(url_8k_txt)exhibit99      = renderApi.get_filing(url_exhibit99)xbrl_instance  = renderApi.get_filing(url_xbrl_instance)# use .get_file() and set return_binary=True# to get non-text files such as images, PDFs, etc.excel_file     = renderApi.get_file(url_excel_file, return_binary=True)pdf_file       = renderApi.get_file(url_pdf_file, return_binary=True)image_file     = renderApi.get_file(url_image_file, return_binary=True)

from sec_api import PdfGeneratorApipdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY")# Tesla's 2024 10-K filing URLfiling_10K_url      = "https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231.htm"# Nvidia's 2024 proxy statement (DEF14A)proxy_statement_url = "https://www.sec.gov/Archives/edgar/data/1045810/000104581024000104/nvda-20240514.htm"# Form 4 disclosing Berkshire Hathaway's $86 million purchase of SIRI stockfiling_4_url        = "https://www.sec.gov/Archives/edgar/data/315090/000095017024114414/xslF345X05/ownership.xml"# Mirosoft's Form 8-K filing disclosing a cybersecurity incidentfiling_8K_url       = "https://www.sec.gov/Archives/edgar/data/789019/000119312524011295/d708866d8k.htm"# Exhibit 99 disclosing updates of financial resultsexhibit_99_url      = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm"pdf_10K_filing   = pdfGeneratorApi.get_pdf(filing_10K_url)pdf_proxy_filing = pdfGeneratorApi.get_pdf(proxy_statement_url)pdf_4_filing     = pdfGeneratorApi.get_pdf(filing_4_url)pdf_8K_filing    = pdfGeneratorApi.get_pdf(filing_8K_url)pdf_ex_99        = pdfGeneratorApi.get_pdf(exhibit_99_url)

import requestsurl = "https://www.ishares.com/us/products/239714/ishares-russell-3000-etf/1467271812596.ajax?" + \      "fileType=csv&fileName=IWV_holdings&dataType=fund&asOfDate=20221230"response = requests.get(url)with open("russell-3000-constituents.csv", "wb") as f:    f.write(response.content)

first_15_lines = "\n".join(response.text.split("\n")[:15])print("First 15 lines of the CSV file:\n")print(first_15_lines)

First 15 lines of the CSV file: iShares Russell 3000 ETFFund Holdings as of,"Dec 30, 2022"Inception Date,"May 22, 2000"Shares Outstanding,"49,100,000.00"Stock,"-"Bond,"-"Cash,"-"Other,"-" Ticker,Name,Sector,Asset Class,Market Value,Weight (%),Notional Value,Shares,Price,Location,Exchange,Currency,FX Rate,Market Currency,Accrual Date"AAPL","APPLE INC","Information Technology","Equity","559,365,151.11","5.16","559,365,151.11","4,305,127.00","129.93","United States","NASDAQ","USD","1.00","USD","-""MSFT","MICROSOFT CORP","Information Technology","Equity","513,917,712.42","4.74","513,917,712.42","2,142,931.00","239.82","United States","NASDAQ","USD","1.00","USD","-""AMZN","AMAZON COM INC","Consumer Discretionary","Equity","213,823,596.00","1.97","213,823,596.00","2,545,519.00","84.00","United States","NASDAQ","USD","1.00","USD","-""BRKB","BERKSHIRE HATHAWAY INC CLASS B","Financials","Equity","159,603,687.60","1.47","159,603,687.60","516,684.00","308.90","United States","New York Stock Exchange Inc.","USD","1.00","USD","-""GOOGL","ALPHABET INC CLASS A","Communication","Equity","151,996,026.75","1.40","151,996,026.75","1,722,725.00","88.23","United States","NASDAQ","USD","1.00","USD","-"

import pandas as pdrussell_3000 = pd.read_csv("russell-3000-constituents.csv", skiprows=9)# remove last two rowsrussell_3000 = russell_3000.iloc[:-2]print("Number of all constituents:", len(russell_3000))print("First five Russell 3000 constituents:")russell_3000.head()

Number of all constituents: 2611First five Russell 3000 constituents:

def create_batches(tickers = [], batch_size = 100):    return [list(tickers[i:i + batch_size]) for i in range(0, len(tickers), batch_size)]ticker_batches = create_batches(russell_3000["Ticker"], batch_size=100)# convert ticker_batches to dataframe with one column "Tickers"# where each row contains a list (batch) of tickersticker_batches_df = pd.DataFrame({"Tickers": ticker_batches})print("Number of batches:", len(ticker_batches_df))print("First five ticker batches:")ticker_batches_df.head()

Number of batches: 27First five ticker batches:

pip install sec-api pandarallel ipywidgets

from pandarallel import pandarallelfrom sec_api import QueryApipandarallel.initialize(nb_workers=4, progress_bar=True)SEC_API_KEY = "YOUR_API_KEY"queryApi = QueryApi(api_key=SEC_API_KEY)

INFO: Pandarallel will run on 4 workers.INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

def get_10K_filing_urls(row=None):    if row is None:        return []    ticker_batch = row["Tickers"]    if len(ticker_batch) == 0:        return []    # create a query string to search for 10-K filings for the given tickers    # "(ticker:AAPL OR ticker:MSFT OR ...)"    ticker_query = " OR ".join([f'ticker:"{ticker}"' for ticker in ticker_batch])    ticker_query = f"({ticker_query})"    # search for 10-K filings filed between 2014-01-01 and 2023-12-31    date_query = "filedAt:[2014-01-01 TO 2023-12-31]"    # exclude 10-K/A and NT 10-K filings    form_type_query = 'formType:"10-K" AND NOT formType:"10-K/A" AND NOT formType:"NT"'    search_query = f"{ticker_query} AND {date_query} AND {form_type_query}"    search_params = {        "query": search_query,        "from": 0,        "size": 50,        "sort": [{"filedAt": {"order": "desc"}}],    }    print(f"Fetching filings for {ticker_batch[:4]}...\n")    has_more_filings = True    filing_urls = []    while has_more_filings:        search_results = queryApi.get_filings(search_params)        filings = search_results["filings"]        if len(filings) == 0:            break        # extract metadata for each filing        # { "ticker": "...", "cik": "...", "filedAt": "...", "filingUrl": "..." }        metadata = list(            map(                lambda f: {                    "ticker": f["ticker"],                    "cik": f["cik"],                    "filedAt": f["filedAt"],                    "accessionNo": f["accessionNo"],                    "filingUrl": f["linkToFilingDetails"],                },                filings,            )        )        filing_urls.extend(metadata)        search_params["from"] += search_params["size"]    return pd.DataFrame(filing_urls)# use the first two batches of tickers for a test runmetadata = ticker_batches_df[:2].parallel_apply(get_10K_filing_urls, axis=1)# uncomment the line below to process all batches# metadata = ticker_batches_df.parallel_apply(get_10K_filing_urls, axis=1)# concatenate the metadata dataframes to get a single dataframemetadata = pd.concat(metadata.tolist(), ignore_index=True)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1), Label(value='0 / 1'))), HBox(c…

Fetching filings for ['SLB', 'REGN', 'VRTX', 'BDX']...Fetching filings for ['AAPL', 'MSFT', 'AMZN', 'BRKB']...

metadata.to_csv("russell-3000-10k-filing-urls.csv", index=False)print("Filing metadata of 10-K filings disclosed between 2014 and 2023:")metadata

Filing metadata of 10-K filings disclosed between 2014 and 2023:

import osfrom sec_api import RenderApirenderApi = RenderApi(SEC_API_KEY)def download_filing(row):    ticker = row["ticker"]    accessionNo = row["accessionNo"]    filedAt = row["filedAt"].split("T")[0]    filing_url = row["filingUrl"]    try:        content = renderApi.get_filing(filing_url)        # check if path "filings/{ticker}/" exists. if not, create it        if not os.path.exists(f"filings/{ticker}/"):            os.makedirs(f"filings/{ticker}/")        file_type = filing_url.split("/")[-1].split(".")[1]        local_file_name = f"filings/{ticker}/{filedAt}_{accessionNo}.{file_type}"        with open(local_file_name, "w") as f:            f.write(content)        print(f"✅ Downloaded {local_file_name}")    except:        print(f"❌ {ticker}: downloaded failed for {filing_url}")# perform test download of first 10 filingsdownloaded = metadata[:10].parallel_apply(download_filing, axis=1)# uncomment the line below to download all filings# downloaded = metadata.parallel_apply(download_filing, axis=1)print(f"Completed downloading {len(downloaded)} filings")

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=3), Label(value='0 / 3'))), HBox(c…

✅ Downloaded filings/AMAT/2023-12-15_0000006951-23-000041.htm✅ Downloaded filings/DIS/2023-11-21_0001744489-23-000216.htm✅ Downloaded filings/DE/2023-12-15_0001558370-23-019812.htm✅ Downloaded filings/ADI/2023-11-21_0000006281-23-000203.htm✅ Downloaded filings/V/2023-11-15_0001403161-23-000099.htm✅ Downloaded filings/QCOM/2023-11-01_0000804328-23-000055.htm✅ Downloaded filings/AVGO/2023-12-14_0001730168-23-000096.htm✅ Downloaded filings/SBUX/2023-11-17_0000829224-23-000058.htm✅ Downloaded filings/AAPL/2023-11-02_0000320193-23-000106.htm✅ Downloaded filings/ACN/2023-10-12_0001467373-23-000324.htmCompleted downloading 10 filings

pip install sec-api

import refrom sec_api import RenderApirenderApi = RenderApi("YOUR_API_KEY")

# Form 10-Q filing URL for Amazon (AMZN) for the period Q2 2024filing_url = "https://www.sec.gov/Archives/edgar/data/1018724/000101872424000130/amzn-20240630.htm"# replace the filing file name with `Financial_Report.xlsx`excel_file_url = re.sub(r"/[^\/]+\.htm", "/Financial_Report.xlsx", filing_url)print("10-Q Filing URL:", filing_url)print("Excel file URL:\t", excel_file_url)

10-Q Filing URL: https://www.sec.gov/Archives/edgar/data/1018724/000101872424000130/amzn-20240630.htmExcel file URL:  https://www.sec.gov/Archives/edgar/data/1018724/000101872424000130/Financial_Report.xlsx

excel_file = renderApi.get_file(excel_file_url, return_binary=True)with open("Financial_Report.xlsx", "wb") as file:    file.write(excel_file)print("Excel file saved to Financial_Report.xlsx")

Excel file saved to Financial_Report.xlsx

pip install sec-api

from sec_api import PdfGeneratorApipdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY")# Tesla's 2024 10-K filing URLfiling_10K_url      = "https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231.htm"# Nvidia's 2024 proxy statement (DEF14A)proxy_statement_url = "https://www.sec.gov/Archives/edgar/data/1045810/000104581024000104/nvda-20240514.htm"# Form 4 disclosing Berkshire Hathaway's $86 million purchase of SIRI stockfiling_4_url        = "https://www.sec.gov/Archives/edgar/data/315090/000095017024114414/xslF345X05/ownership.xml"# Mirosoft's Form 8-K filing disclosing a cybersecurity incidentfiling_8K_url       = "https://www.sec.gov/Archives/edgar/data/789019/000119312524011295/d708866d8k.htm"# Exhibit 99 disclosing updates of financial resultsexhibit_99_url      = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm"# convert all EDGAR filings and exhibits to PDFpdf_10K_filing   = pdfGeneratorApi.get_pdf(filing_10K_url)pdf_proxy_filing = pdfGeneratorApi.get_pdf(proxy_statement_url)pdf_4_filing     = pdfGeneratorApi.get_pdf(filing_4_url)pdf_8K_filing    = pdfGeneratorApi.get_pdf(filing_8K_url)pdf_ex_99        = pdfGeneratorApi.get_pdf(exhibit_99_url)# save the PDF files to diskwith open("tesla_10K.pdf", "wb") as file:    file.write(pdf_10K_filing)with open("nvidia_proxy_statement.pdf", "wb") as file:    file.write(pdf_proxy_filing)with open("berkshire_form_4.pdf", "wb") as file:    file.write(pdf_4_filing)with open("microsoft_8K.pdf", "wb") as file:    file.write(pdf_8K_filing)with open("exhibit_99.pdf", "wb") as file:    file.write(pdf_ex_99)

pip install sec-api

SEC_API_KEY = "YOUR_API_KEY"

import pandas as pdfrom sec_api import QueryApiqueryApi = QueryApi(SEC_API_KEY)

filings = []base_query = 'dataFiles:* AND formType:"10-Q" AND NOT formType:"10-Q/A"'start_year = 2020end_year = 2023for year in range(start_year, end_year + 1):    print(f"Starting to download metadata of filings from {year}")    for month in range(1, 13):        print(f"-- Starting month {month}")        date_range_query = f"filedAt:[{year}-{month:02d}-01 TO {year}-{month:02d}-31]"        query = f"{base_query} AND {date_range_query}"        search_parameters = {            "query": query,            "from": 0,            "size": 50,            "sort": [{"filedAt": {"order": "desc"}}],        }        has_more_filings = True        while has_more_filings:            response = queryApi.get_filings(search_parameters)            if len(response["filings"]) == 0:                has_more_filings = False                break            filings.append(response["filings"])            search_parameters["from"] += 50            # uncomment the following line to fetch all filings            breakfilings = [item for sublist in filings for item in sublist]filings = pd.DataFrame(filings)

Starting to download metadata of filings from 2022-- Starting month 1-- Starting month 2-- Starting month 3-- Starting month 4-- Starting month 5-- Starting month 6-- Starting month 7-- Starting month 8-- Starting month 9-- Starting month 10-- Starting month 11-- Starting month 12Starting to download metadata of filings from 2023-- Starting month 1-- Starting month 2-- Starting month 3-- Starting month 4-- Starting month 5-- Starting month 6-- Starting month 7-- Starting month 8-- Starting month 9-- Starting month 10-- Starting month 11-- Starting month 12

print(f"Total filings fetched: {len(filings)}")print("10-Q filing metadata including XBRL data files:")filings[["ticker", "cik", "formType", "accessionNo", "filedAt", "dataFiles"]].head(10)

Total filings fetched: 120010-Q filing metadata including XBRL data files:

import jsonprint("Metadata of XBRL files from the first filing:")print(json.dumps(response["filings"][0]["dataFiles"], indent=2))

Metadata of XBRL files from the first filing:[  {    "sequence": "6",    "size": "21999",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/1884072/000119983523000643/jewl-20230630.xsd",    "description": "XBRL SCHEMA FILE",    "type": "EX-101.SCH"  },  {    "sequence": "7",    "size": "36299",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/1884072/000119983523000643/jewl-20230630_cal.xml",    "description": "XBRL CALCULATION FILE",    "type": "EX-101.CAL"  },  {    "sequence": "8",    "size": "68475",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/1884072/000119983523000643/jewl-20230630_def.xml",    "description": "XBRL DEFINITION FILE",    "type": "EX-101.DEF"  },  {    "sequence": "9",    "size": "197267",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/1884072/000119983523000643/jewl-20230630_lab.xml",    "description": "XBRL LABEL FILE",    "type": "EX-101.LAB"  },  {    "sequence": "10",    "size": "157708",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/1884072/000119983523000643/jewl-20230630_pre.xml",    "description": "XBRL PRESENTATION FILE",    "type": "EX-101.PRE"  },  {    "sequence": "42",    "size": "298216",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/1884072/000119983523000643/jewl-10q_htm.xml",    "description": "EXTRACTED XBRL INSTANCE DOCUMENT",    "type": "XML"  }]

xbrl_files = filings.explode("dataFiles")[    ["ticker", "cik", "formType", "accessionNo", "filedAt", "dataFiles"]]columns_to_add = ["type", "description", "documentUrl"]for col in columns_to_add:    xbrl_files[col] = xbrl_files["dataFiles"].apply(        lambda x: x[col] if col in x else None    )xbrl_files = xbrl_files.drop(columns=["dataFiles"])# save to CSV filexbrl_files.to_csv("xbrl_files.csv", index=False)print("XBRL data files:")xbrl_files

XBRL data files:

pip install pandarallel ipywidgets

import osfrom pandarallel import pandarallelfrom sec_api import RenderApipandarallel.initialize(nb_workers=4, progress_bar=True)renderApi = RenderApi(SEC_API_KEY)

INFO: Pandarallel will run on 4 workers.INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

def download_and_save_xbrl_file(row):    cik = row["cik"]    accessionNo = row["accessionNo"]    file_url = row["documentUrl"]    file_type = row["type"]    filename_type = file_url.split(".")[-1]    try:        xbrl_data = renderApi.get_file(file_url)        xbrl_file_name = f"{file_type}.{filename_type}"        folder_path = f"xbrl-files/{cik}/{accessionNo}"        file_path = f"{folder_path}/{xbrl_file_name}"        if not os.path.exists(folder_path):            os.makedirs(folder_path)        with open(file_path, "w") as f:            f.write(xbrl_data)    except Exception as e:        print(f"Failed to download {file_url} for {cik} - {accessionNo}\n")        return None# download and save the first 50 XBRL filesresults = xbrl_files[:50].parallel_apply(download_and_save_xbrl_file, axis=1)# uncomment the line below to download all XBRL files# results = xbrl_files.parallel_apply(download_and_save_xbrl_file, axis=1)print(f"Downloaded {len(results)} XBRL files")

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=13), Label(value='0 / 13'))), HBox…

Downloaded 50 XBRL files

pip install sec-api

SEC_API_KEY = "YOUR_API_KEY"

"documentFormatFiles": [  {    "sequence": "3",    "size": "50752",    "documentUrl": "https://www.sec.gov/Archives/edgar/data/72331/000007233123000242/ex10-unordsonxformofstocko.htm",    "description": "EX-10.U",    "type": "EX-10.U"  }  // ... additional documents],// ... other filing metadata

import pandas as pdfrom sec_api import QueryApiqueryApi = QueryApi(api_key=SEC_API_KEY)

query = 'formType:"10-K" AND documentFormatFiles.type:"EX-10" AND filedAt:[2020-01-01 TO 2023-12-31]'search_params = {    "query": query,    "from": 0,    "size": 50,    "sort": [{"filedAt": {"order": "desc"}}],}response = queryApi.get_filings(search_params)print(f'Number of 10-K filings with Exhibit 10 (2020-2023) found:')print(response["total"]["value"])

Number of 10-K filings with Exhibit 10 (2020-2023) found:923

import redef is_exhibit_10(file):    return bool(re.search(r"EX-10", file["type"]))def get_exhibit_10_urls():    exhibits = []    has_more_filings = True    query = 'formType:"10-K" AND documentFormatFiles.type:"EX-10" AND filedAt:[2020-01-01 TO 2023-12-31]'    search_params = {        "query": query,        "from": 0,        "size": 50,        "sort": [{"filedAt": {"order": "desc"}}],    }    while has_more_filings:        response = queryApi.get_filings(search_params)        if len(response["filings"]) == 0:            break        for filing in response["filings"]:            for file in filing["documentFormatFiles"]:                if is_exhibit_10(file):                    exhibits.append(                        {                            "accessionNo": filing["accessionNo"],                            "filedAt": filing["filedAt"],                            "companyName": filing["companyName"],                            "ticker": filing["ticker"],                            "cik": filing["cik"],                            "exhibit10Url": file["documentUrl"],                        }                    )        search_params["from"] += 50    return pd.DataFrame(exhibits)exhibit_10_files = get_exhibit_10_urls()print("Exhibit 10 files:")exhibit_10_files

Exhibit 10 files:

exhibit_10_files['exhibit10Url'][:10].to_list()

['https://www.sec.gov/Archives/edgar/data/72331/000007233123000242/ex10-unordsonxformofstocko.htm', 'https://www.sec.gov/Archives/edgar/data/72331/000007233123000242/ex10-vnordsonxformofrestri.htm', 'https://www.sec.gov/Archives/edgar/data/72331/000007233123000242/ex10-wnoticeofawardpsufy24.htm', 'https://www.sec.gov/Archives/edgar/data/72331/000007233123000242/ex10-xrestrictedshareunita.htm', 'https://www.sec.gov/Archives/edgar/data/357294/000143774923034783/ex_606399.htm', 'https://www.sec.gov/Archives/edgar/data/357294/000143774923034783/ex_605281.htm', 'https://www.sec.gov/Archives/edgar/data/357294/000143774923034783/ex_605282.htm', 'https://www.sec.gov/Archives/edgar/data/357294/000143774923034783/ex_605283.htm', 'https://www.sec.gov/Archives/edgar/data/357294/000143774923034783/ex_605284.htm', 'https://www.sec.gov/Archives/edgar/data/357294/000143774923034783/ex_605285.htm']

pip install pandarallel ipywidgets

import osfrom pandarallel import pandarallelfrom sec_api import RenderApi, PdfGeneratorApipandarallel.initialize(nb_workers=5, progress_bar=True)renderApi = RenderApi(SEC_API_KEY)pdfGeneratorApi = PdfGeneratorApi(SEC_API_KEY)def download_exhibit_10_file(row):    cik = row["cik"]    accessionNo = row["accessionNo"]    url = row["exhibit10Url"]    file_name_html = url.split("/")[-1]    file_name_pdf = file_name_html.replace(".htm", ".pdf")    folder = f"exhibit_10_files/{cik}/{accessionNo}/"    if not os.path.exists(folder):        os.makedirs(folder)    exhibit_file_html = renderApi.get_file(url)    exhibit_file_pdf = pdfGeneratorApi.get_pdf(url)    # save HTML and PDF file    with open(folder + file_name_html, "w") as file:        file.write(exhibit_file_html)    with open(folder + file_name_pdf, "wb") as file:        file.write(exhibit_file_pdf)# download the first 20 Exhibit 10 filesresults = exhibit_10_files[:20].parallel_apply(download_exhibit_10_file, axis=1)# uncomment to download all Exhibit 10 files# results = exhibit_10_files.parallel_apply(download_exhibit_10_file, axis=1)

INFO: Pandarallel will run on 5 workers.INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=4), Label(value='0 / 4'))), HBox(c…

!pip install -q sec-api

from sec_api import QueryApiapi_key = "YOUR_API_KEY"queryApi = QueryApi(api_key)

search_params = {    "query": 'formType:"8-K" AND documentFormatFiles.type:(99, 99*, *99, *99*) AND items:"9.01" AND items:"2.02"',    "from": "0",    "size": "50",    "sort": [{"filedAt": {"order": "desc"}}],}response = queryApi.get_filings(search_params)

import pandas as pd# convert Query API response into a DataFramefilings = pd.DataFrame.from_records(response['filings'])

print('Keys of the metadata for each filing')print('---------------------------------')print(*list(filings.keys()), sep='\n')

Keys of the metadata for each filing---------------------------------tickerformTypeaccessionNocikcompanyNameLongcompanyNamelinkToFilingDetailsdescriptionlinkToTxtfiledAtdocumentFormatFilesperiodOfReportentitiesidseriesAndClassesContractsInformationitemslinkToHtmllinkToXbrldataFiles

filings.head(3)

documentFormatFiles = [doc for sublist in list(filings['documentFormatFiles']) for doc in sublist]exhibit_99s = list(filter(lambda doc: '99' in doc['type'], documentFormatFiles))

exhibit_99s[:5]

[{'sequence': '2',  'size': '14187',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/1506492/000114036124045016/ef20037953_99-1.htm',  'description': 'EXHIBIT 99.1',  'type': 'EX-99.1'}, {'sequence': '2',  'size': '200055',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/814676/000143774924033004/ex_741572.htm',  'description': 'EXHIBIT 99.1 PRESS RELEASE',  'type': 'EX-99.1'}, {'sequence': '3',  'size': '184062',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/814676/000143774924033004/ex_741707.htm',  'description': 'EXHIBIT 99.2',  'type': 'EX-99.2'}, {'sequence': '2',  'size': '100966',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/1711754/000121390024093531/ea021928902ex99-1_inmune.htm',  'description': 'PRESS RELEASE OF INMUNE BIO INC., DATED OCTOBER 31, 2024',  'type': 'EX-99.1'}, {'sequence': '2',  'size': '66514',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/1538822/000110465924113497/tm2427172d1_ex99-1.htm',  'description': 'EXHIBIT 99.1',  'type': 'EX-99.1'}]

from pathlib import Pathimport jsondef download_metadata(start_year=2020, end_year=2023):    output_file = "exhibit-99-8k-filings-metadata.csv"    if Path(output_file).is_file():        result = pd.read_csv(output_file)        return result    print("✅ Starting download process")    # create ticker batches, with 100 tickers per batch    frames = []    for year in range(start_year, end_year + 1):        for month in range(1, 13):            for from_index in range(0, 9950, 50):                date_range_query = f"filedAt:[{year}-{month:02d}-01 TO {year}-{month:02d}-31]"                form_tye_query = 'formType:"8-K"'                document_format_query = "documentFormatFiles.type:(99, 99*, *99, *99*)"                items_query = 'items:("9.01" AND "2.02")'                query = (                    form_tye_query                    + " AND "                    + document_format_query                    + " AND "                    + items_query                    + " AND "                    + date_range_query                )                search_params = {                    "query": query,                    "from": from_index,                    "size": "50",                    "sort": [{"filedAt": {"order": "desc"}}],                }                # print(json.dumps(query))                response = queryApi.get_filings(search_params)                if len(response["filings"]) == 0:                    break                filings = pd.DataFrame.from_records(response["filings"])                documentFormatFiles = [                    doc                    for sublist in list(filings["documentFormatFiles"])                    for doc in sublist                ]                exhibit_99s_list = list(                    filter(lambda doc: "99" in doc["type"], documentFormatFiles)                )                exhibit_99s_df = pd.DataFrame.from_records(exhibit_99s_list)                frames.append(exhibit_99s_df)                print(                    "Month {year}-{month:02d}, from {from_index} completed".format(                        year=year, month=month, from_index=from_index                    )                )        print("✅ Downloaded metadata for year", year)    result = pd.concat(frames)    result.to_csv(output_file, index=False)    number_metadata_downloaded = len(result)    print(        "✅ Downloaded completed. Metadata downloaded for {} filings.".format(            number_metadata_downloaded        )    )    return resultexhibit_99s = download_metadata(start_year=2023, end_year=2023)

✅ Starting download processMonth 2023-01, from 0 completedMonth 2023-02, from 0 completedMonth 2023-03, from 0 completedMonth 2023-04, from 0 completedMonth 2023-05, from 0 completedMonth 2023-06, from 0 completedMonth 2023-07, from 0 completedMonth 2023-08, from 0 completedMonth 2023-09, from 0 completedMonth 2023-10, from 0 completedMonth 2023-11, from 0 completedMonth 2023-12, from 0 completed✅ Downloaded metadata for year 2023✅ Downloaded completed. Metadata downloaded for 720 filings.

print('Number of Exhibit 99 URLs found for 2023:', len(exhibit_99s))exhibit_99s

Number of Exhibit 99 URLs found for 2023: 720

from sec_api import RenderApi, PdfGeneratorApiimport osrenderApi = RenderApi(api_key)pdfGeneratorApi = PdfGeneratorApi(api_key)def download_exhibit(metadata):    url = metadata["documentUrl"].replace("ix?doc=/", "")    try:        cik = [cik for cik in url.split("/") if cik.isdigit()][0]        accession_number = [cik for cik in url.split("/") if cik.isdigit()][1]        new_folder = "./exhibit-99-files/" + cik        if not os.path.isdir(new_folder):            os.makedirs(new_folder)        file_content = renderApi.get_filing(url)        file_content_pdf = pdfGeneratorApi.get_pdf(url)        file_name = accession_number + "-" + url.split("/")[-1]        file_name_pdf = file_name + ".pdf"        with open(new_folder + "/" + file_name, "w") as f:            f.write(file_content)        with open(new_folder + "/" + file_name_pdf, "wb") as f:            f.write(file_content_pdf)    except:        print("❌ download failed: {url}".format(url=url))

!pip install -q pandarallel

from pandarallel import pandarallelnumber_of_workers = 4pandarallel.initialize(progress_bar=True, nb_workers=number_of_workers, verbose=0)# run a quick test and download 50 exhibitssample = exhibit_99s.head(50)sample.parallel_apply(download_exhibit, axis=1)# uncomment to download all exhibits# exhibit_99s.parallel_apply(download_filing, axis=1)print('✅ Download completed')

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=13), Label(value='0 / 13'))), HBox…

✅ Download completed

	Tickers
0	[AAPL, MSFT, AMZN, BRKB, GOOGL, UNH, GOOG, JNJ...
1	[SLB, REGN, VRTX, BDX, ZTS, TGT, APD, ITW, BSX...
2	[PH, PRU, MSCI, YUM, CHTR, ALL, ECL, KMI, LULU...
3	[VMC, AEE, ETR, WY, FE, FTV, EBAY, LEN, DTE, A...
4	[TXT, ETSY, MOS, HWM, WRB, AVY, SWKS, SYF, FIC...

	ticker	cik	filedAt	accessionNo	filingUrl
0	AMAT	6951	2023-12-15T16:01:24-05:00	0000006951-23-000041	https://www.sec.gov/Archives/edgar/data/6951/0...
1	DE	315189	2023-12-15T10:27:39-05:00	0001558370-23-019812	https://www.sec.gov/Archives/edgar/data/315189...
2	AVGO	1730168	2023-12-14T16:54:05-05:00	0001730168-23-000096	https://www.sec.gov/Archives/edgar/data/173016...
3	DIS	1744489	2023-11-21T17:04:04-05:00	0001744489-23-000216	https://www.sec.gov/Archives/edgar/data/174448...
4	ADI	6281	2023-11-21T16:24:08-05:00	0000006281-23-000203	https://www.sec.gov/Archives/edgar/data/6281/0...
...	...	...	...	...	...
1936	GD	40533	2014-02-07T09:05:33-05:00	0000040533-14-000002	https://www.sec.gov/Archives/edgar/data/40533/...
1937	BIIB	875045	2014-02-06T17:28:12-05:00	0000875045-14-000004	https://www.sec.gov/Archives/edgar/data/875045...
1938	GM	1467858	2014-02-06T12:49:42-05:00	0001467858-14-000043	https://www.sec.gov/Archives/edgar/data/146785...
1939	CMG	1058090	2014-02-04T20:22:21-05:00	0001193125-14-035451	https://www.sec.gov/Archives/edgar/data/105809...
1940	SLB	87347	2014-01-31T08:41:08-05:00	0001564590-14-000090	https://www.sec.gov/Archives/edgar/data/87347/...

	ticker	cik	formType	accessionNo	filedAt	dataFiles
0	EVOA	728447	10-Q	0000950170-22-000601	2022-01-31T19:24:32-05:00	[{'sequence': '6', 'size': '125600', 'document...
1	EVOA	728447	10-Q	0000950170-22-000600	2022-01-31T19:22:42-05:00	[{'sequence': '6', 'size': '82410', 'documentU...
2	EVOA	728447	10-Q	0000950170-22-000599	2022-01-31T19:20:34-05:00	[{'sequence': '6', 'size': '862403', 'document...
3	TVC	1376986	10-Q	0001376986-22-000005	2022-01-31T17:36:44-05:00	[{'sequence': '6', 'size': '120761', 'document...
4	HP	46765	10-Q	0000046765-22-000006	2022-01-31T17:23:47-05:00	[{'sequence': '5', 'size': '59546', 'documentU...
5	LUB	16099	10-Q	0000016099-22-000006	2022-01-31T16:52:26-05:00	[{'sequence': '7', 'size': '50542', 'documentU...
6	DLHC	785557	10-Q	0000785557-22-000003	2022-01-31T16:32:17-05:00	[{'sequence': '5', 'size': '38022', 'documentU...
7	CRUS	772406	10-Q	0000772406-22-000006	2022-01-31T16:01:19-05:00	[{'sequence': '6', 'size': '36119', 'documentU...
8	MNRO	876427	10-Q	0000876427-22-000003	2022-01-31T15:51:34-05:00	[{'sequence': '6', 'size': '34194', 'documentU...
9	ADP	8670	10-Q	0000008670-22-000014	2022-01-31T15:19:08-05:00	[{'sequence': '8', 'size': '51863', 'documentU...

	ticker	cik	formType	accessionNo	filedAt	type	description	documentUrl
0	EVOA	728447	10-Q	0000950170-22-000601	2022-01-31T19:24:32-05:00	EX-101.SCH	XBRL TAXONOMY EXTENSION SCHEMA DOCUMENT	https://www.sec.gov/Archives/edgar/data/728447...
0	EVOA	728447	10-Q	0000950170-22-000601	2022-01-31T19:24:32-05:00	EX-101.PRE	XBRL TAXONOMY EXTENSION PRESENTATION LINKBASE ...	https://www.sec.gov/Archives/edgar/data/728447...
0	EVOA	728447	10-Q	0000950170-22-000601	2022-01-31T19:24:32-05:00	EX-101.LAB	XBRL TAXONOMY EXTENSION LABEL LINKBASE DOCUMENT	https://www.sec.gov/Archives/edgar/data/728447...
0	EVOA	728447	10-Q	0000950170-22-000601	2022-01-31T19:24:32-05:00	EX-101.CAL	XBRL TAXONOMY EXTENSION CALCULATION LINKBASE D...	https://www.sec.gov/Archives/edgar/data/728447...
0	EVOA	728447	10-Q	0000950170-22-000601	2022-01-31T19:24:32-05:00	EX-101.DEF	XBRL TAXONOMY EXTENSION DEFINITION LINKBASE DO...	https://www.sec.gov/Archives/edgar/data/728447...
...	...	...	...	...	...	...	...	...
1199	AITR	1966734	10-Q	0001493152-23-045528	2023-12-20T11:13:54-05:00	EX-101.CAL	XBRL CALCULATION FILE	https://www.sec.gov/Archives/edgar/data/196673...
1199	AITR	1966734	10-Q	0001493152-23-045528	2023-12-20T11:13:54-05:00	EX-101.DEF	XBRL DEFINITION FILE	https://www.sec.gov/Archives/edgar/data/196673...
1199	AITR	1966734	10-Q	0001493152-23-045528	2023-12-20T11:13:54-05:00	EX-101.LAB	XBRL LABEL FILE	https://www.sec.gov/Archives/edgar/data/196673...
1199	AITR	1966734	10-Q	0001493152-23-045528	2023-12-20T11:13:54-05:00	EX-101.PRE	XBRL PRESENTATION FILE	https://www.sec.gov/Archives/edgar/data/196673...
1199	AITR	1966734	10-Q	0001493152-23-045528	2023-12-20T11:13:54-05:00	XML	EXTRACTED XBRL INSTANCE DOCUMENT	https://www.sec.gov/Archives/edgar/data/196673...

EDGAR File Type	File Extension	Description
EX-101.SCH	*.xsd	XBRL Taxonomy Schema
EX-101.CAL	*.xml	XBRL Calculation Linkbase
EX-101.DEF	*.xml	XBRL Definition Linkbase
EX-101.LAB	*.xml	XBRL Label Linkbase
EX-101.PRE	*.xml	XBRL Presentation Linkbase
XML	*.xml	XBRL Instance Document

Download SEC Filings With Python

Quick Start

Filing Download API

PDF Generator API

Download SEC 10-K Filings of Russell 3000 Companies

Create a List of Tickers for Russell 3000 Companies

Create a List of URLs for 10-K Filings

Download SEC 10-K Filings to Local Disk

Download Financial Statements from 10-K and 10-Q Filings as Excel Files

Download SEC EDGAR Filings as PDFs

Download XBRL Data Files from SEC Filings

Finding URLs of XBRL Files

Download XBRL Files from SEC Filings

Download Material Contracts (Exhibit 10) from SEC 10-K Filings

Find and Aggregate URLs of Exhibit 10 Files from 10-K Filings

Download Material Contracts as HTML and PDF Files

Download Press Releases with Financial Results in Exhibit 99 from SEC 8-K Filings

Exhibit 99 in 8-K Filings

Types of Exhibit 99 Content in 8-K Filings

Find 8-K Filings with Exhibit 99

Download Press Releases from Exhibit 99 as HTML and PDF

	Ticker	Name	Sector	Asset Class	Market Value	Weight (%)	Notional Value	Shares	Price	Location	Exchange	Currency	FX Rate	Market Currency	Accrual Date
0	AAPL	APPLE INC	Information Technology	Equity	559,365,151.11	5.16	559,365,151.11	4,305,127.00	129.93	United States	NASDAQ	USD	1.0	USD	-
1	MSFT	MICROSOFT CORP	Information Technology	Equity	513,917,712.42	4.74	513,917,712.42	2,142,931.00	239.82	United States	NASDAQ	USD	1.0	USD	-
2	AMZN	AMAZON COM INC	Consumer Discretionary	Equity	213,823,596.00	1.97	213,823,596.00	2,545,519.00	84.00	United States	NASDAQ	USD	1.0	USD	-
3	BRKB	BERKSHIRE HATHAWAY INC CLASS B	Financials	Equity	159,603,687.60	1.47	159,603,687.60	516,684.00	308.90	United States	New York Stock Exchange Inc.	USD	1.0	USD	-
4	GOOGL	ALPHABET INC CLASS A	Communication	Equity	151,996,026.75	1.40	151,996,026.75	1,722,725.00	88.23	United States	NASDAQ	USD	1.0	USD	-

	accessionNo	filedAt	companyName	ticker	cik	exhibit10Url
0	0000072331-23-000242	2023-12-20T17:04:04-05:00	NORDSON CORP	NDSN	72331	https://www.sec.gov/Archives/edgar/data/72331/...
1	0000072331-23-000242	2023-12-20T17:04:04-05:00	NORDSON CORP	NDSN	72331	https://www.sec.gov/Archives/edgar/data/72331/...
2	0000072331-23-000242	2023-12-20T17:04:04-05:00	NORDSON CORP	NDSN	72331	https://www.sec.gov/Archives/edgar/data/72331/...
3	0000072331-23-000242	2023-12-20T17:04:04-05:00	NORDSON CORP	NDSN	72331	https://www.sec.gov/Archives/edgar/data/72331/...
4	0001437749-23-034783	2023-12-18T17:26:13-05:00	HOVNANIAN ENTERPRISES INC	HOV	357294	https://www.sec.gov/Archives/edgar/data/357294...
...	...	...	...	...	...	...
2796	0001133421-20-000006	2020-01-30T06:39:26-05:00	NORTHROP GRUMMAN CORP /DE/	NOC	1133421	https://www.sec.gov/Archives/edgar/data/113342...
2797	0001067701-20-000008	2020-01-29T16:35:59-05:00	UNITED RENTALS, INC.	URI	1067701	https://www.sec.gov/Archives/edgar/data/104716...
2798	0001067701-20-000008	2020-01-29T16:35:59-05:00	UNITED RENTALS NORTH AMERICA INC	URI	1047166	https://www.sec.gov/Archives/edgar/data/104716...
2799	0001564590-20-002467	2020-01-29T16:32:43-05:00	SYNNEX CORP	SNX	1177394	https://www.sec.gov/Archives/edgar/data/117739...
2800	0001052918-20-000008	2020-01-10T13:18:02-05:00	Timberline Resources Corp	TLRS	1288750	https://www.sec.gov/Archives/edgar/data/128875...

	ticker	formType	accessionNo	cik	companyNameLong	companyName	linkToFilingDetails	description	linkToTxt	filedAt	documentFormatFiles	periodOfReport	entities	id	seriesAndClassesContractsInformation	items	linkToHtml	dataFiles
0	NUWE	8-K	0001140361-24-045016	1506492	Nuwellis, Inc. (Filer)	Nuwellis, Inc.	https://www.sec.gov/Archives/edgar/data/150649...	Form 8-K - Current report - Item 2.02 Item 9.01	https://www.sec.gov/Archives/edgar/data/150649...	2024-11-01T16:45:31-04:00	[{'sequence': '1', 'size': '29515', 'documentU...	2024-11-01	[{'fiscalYearEnd': '1231', 'stateOfIncorporati...	05cca44e7a981e8c0640dafab4a50149	[]	[Item 2.02: Results of Operations and Financia...	https://www.sec.gov/Archives/edgar/data/150649...	[{'sequence': '3', 'size': '3997', 'documentUr...
1	CPSH	8-K	0001437749-24-033004	814676	CPS TECHNOLOGIES CORP/DE/ (Filer)	CPS TECHNOLOGIES CORP/DE/	https://www.sec.gov/Archives/edgar/data/814676...	Form 8-K - Current report - Item 2.02 Item 8.0...	https://www.sec.gov/Archives/edgar/data/814676...	2024-11-01T16:40:51-04:00	[{'sequence': '1', 'size': '24803', 'documentU...	2024-11-01	[{'fiscalYearEnd': '1228', 'stateOfIncorporati...	aee603751997acf7a312c9543c0e1f53	[]	[Item 2.02: Results of Operations and Financia...	https://www.sec.gov/Archives/edgar/data/814676...	[{'sequence': '4', 'size': '3409', 'documentUr...
2	INMB	8-K	0001213900-24-093531	1711754	Inmune Bio, Inc. (Filer)	Inmune Bio, Inc.	https://www.sec.gov/Archives/edgar/data/171175...	Form 8-K - Current report - Item 2.02 Item 9.01	https://www.sec.gov/Archives/edgar/data/171175...	2024-11-01T16:23:48-04:00	[{'sequence': '1', 'size': '24650', 'documentU...	2024-10-31	[{'fiscalYearEnd': '1231', 'stateOfIncorporati...	8a3b08e2f061a3c4d0b97044c089fb76	[]	[Item 2.02: Results of Operations and Financia...	https://www.sec.gov/Archives/edgar/data/171175...	[{'sequence': '4', 'size': '3018', 'documentUr...

	sequence	size	documentUrl	description	type
0	2	76339	https://www.sec.gov/Archives/edgar/data/117515...	EXHIBIT 99.1	EX-99.1
1	2	310936	https://www.sec.gov/Archives/edgar/data/130221...	EX-99.1	EX-99.1
2	2	4285072	https://www.sec.gov/Archives/edgar/data/103754...	EX-99.1	EX-99.1
3	3	274620	https://www.sec.gov/Archives/edgar/data/103754...	EX-99.2	EX-99.2
4	2	4285072	https://www.sec.gov/Archives/edgar/data/103754...	EX-99.1	EX-99.1
...	...	...	...	...	...
54	3	24419	https://www.sec.gov/Archives/edgar/data/122338...	EX-99.2	EX-99.2
55	2	90940	https://www.sec.gov/Archives/edgar/data/171662...	PRESS RELEASE	EX-99.1
56	4	29311	https://www.sec.gov/Archives/edgar/data/730255...	EX-99.1	EX-99.1
57	2	5475	https://www.sec.gov/Archives/edgar/data/706129...	EX-99.1	EX-99.1
58	2	103416	https://www.sec.gov/Archives/edgar/data/779544...	EX-99.1	EX-99.1