!pip install -q sec-api pandas

API_KEY = "YOUR_API_KEY"import pandas as pdfrom sec_api import QueryApi, RenderApiqueryApi = QueryApi(API_KEY)renderApi = RenderApi(API_KEY)

lucene_query = 'formType:"8-K" AND documentFormatFiles.type:*99* AND filedAt:[2020-01-01 TO 2020-12-31]'query = {    "query": lucene_query,    # defines the start record to fetch. Used for pagination.    "from": "0",    # defines how many records to return. Maximum is 50.    "size": "10",    # sort results by filedAt, starting with the most recent filings.    "sort": [{"filedAt": {"order": "desc"}}],}response = queryApi.get_filings(query)print(f"Number of Form 8-K filings with exhibit 99 in 2020: {response['total']}")

Number of Form 8-K filings with exhibit 99 in 2020: {'value': 10000, 'relation': 'gte'}

filings = pd.DataFrame(response["filings"])filings[    [        "accessionNo",        "filedAt",        "companyName",        "cik",        "ticker",        "items",        "documentFormatFiles",    ]].head()

filings["documentFormatFiles"][0][:3]

[{'sequence': '1',  'size': '49284',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/314227/000165495420014094/tomi_8k.htm',  'description': 'CURRENT REPORT',  'type': '8-K'}, {'sequence': '2',  'size': '24539',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/314227/000165495420014094/tomi_ex991.htm',  'description': 'PRESENTATION',  'type': 'EX-99.1'}, {'sequence': '3',  'size': '35390',  'documentUrl': 'https://www.sec.gov/Archives/edgar/data/314227/000165495420014094/tomi_ex991000.jpg',  'description': 'IMAGE',  'type': 'GRAPHIC'}]

def extract_ex_99_urls(row):    urls = []    for file in row["documentFormatFiles"]:        if "EX-99" in file["type"]:            urls.append(                {                    "filedAt": row["filedAt"],                    "accessionNo": row["accessionNo"],                    "cik": row["cik"],                    "ticker": row["ticker"],                    "type": file["type"],                    "exhibit99Url": file["documentUrl"],                }            )    return urlsexhibit_99_urls = filings.apply(lambda row: extract_ex_99_urls(row), axis=1)exhibit_99_urls = pd.DataFrame(exhibit_99_urls.explode().to_list())exhibit_99_urls

!pip install -q pandarallel ipywidgets

from pandarallel import pandarallelpandarallel.initialize(nb_workers=10, progress_bar=False)

INFO: Pandarallel will run on 10 workers.INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

"""fetch_exhibit_99_urls(query, retry_counter=0)    Fetches the exhibit 99 URLs for a given Query API query.    Parameters:    - query (dict): The Query API query to be used to fetch filing metadata and exhibit 99 URLs.    - retry_counter (int): The number of times the function has retried to fetch data.    Returns:    - list: A list of exhibit 99 URLs for the filings returned by the query."""def fetch_exhibit_99_urls(query, retry_counter=0):    try:        response = queryApi.get_filings(query)    except Exception as e:        if retry_counter < 3:            print(f"Retrying... {retry_counter}")            return fetch_exhibit_99_urls(query, retry_counter + 1)        else:            print(f"Failed to fetch data after {retry_counter} retries")            return []    if len(response["filings"]) == 0:        return []    filings = pd.DataFrame(response["filings"])    return filings.apply(lambda row: extract_ex_99_urls(row), axis=1).explode().to_list()

"""fetch_all_exhibit_99_urls(start_year, end_year)    Fetches all exhibit 99 URLs of Form 8-K filings for the specified range of years.    Parameters:    - start_year (int): The start year of the range (inclusive).    - end_year (int): The end year (inclusive).    Returns:    - list: A list of dictionaries containing the exhibit 99 URLs."""def fetch_all_exhibit_99_urls(start_year, end_year):    if start_year > end_year:        raise ValueError("start_year must be less than or equal to end_year")    all_exhibit_99_urls = []    for year in range(start_year, end_year + 1):        print(f"Fetching exhibit 99 URLs for year {year}")        for month in range(1, 13):            print(f"  Processing month: {month}")            queries = []            query_from = 0            form_type_filter = 'formType:"8-K"'            file_filter = "documentFormatFiles.type:*99*"            date_filter = f"filedAt:[{year}-{month:02d}-01 TO {year}-{month:02d}-31]"            lucene_query = f"{form_type_filter} AND {file_filter} AND {date_filter}"            query = {                "query": lucene_query,                "from": query_from,                "size": "50",                "sort": [{"filedAt": {"order": "desc"}}],            }            response = queryApi.get_filings(query)            total_filings = response["total"]["value"]            print(f"  Found {total_filings} filings in {year}-{month:02d}")            if total_filings == 0:                continue            # create queries, each query with a from value of 50, 100, 150, etc.            for i in range(0, total_filings, 50):                queries.append(                    {                        "query": {                            "query": lucene_query,                            "from": i,                            "size": "50",                            "sort": [{"filedAt": {"order": "desc"}}],                        }                    }                )            queries = pd.DataFrame(queries)            # use pandarallel to parallelize the fetching of exhibit 99 URLs            exhibit_99_urls = queries["query"].parallel_apply(fetch_exhibit_99_urls)            all_exhibit_99_urls.extend(exhibit_99_urls)    # flatten, filter, and sort the exhibit 99 URLs    all_exhibit_99_urls_flat = [item for sublist in all_exhibit_99_urls for item in sublist]    all_exhibit_99_urls_flat = [item for item in all_exhibit_99_urls_flat if type(item) == dict]    all_exhibit_99_urls_df = pd.DataFrame(all_exhibit_99_urls_flat)    all_exhibit_99_urls_df["filedAt"] = pd.to_datetime(all_exhibit_99_urls_df["filedAt"], utc=True)    all_exhibit_99_urls_df["filedAt"] = all_exhibit_99_urls_df["filedAt"].dt.tz_convert("America/New_York")    all_exhibit_99_urls_df = all_exhibit_99_urls_df.sort_values("filedAt", ascending=True)    return all_exhibit_99_urls_dfexhibit_99_urls_2020 = fetch_all_exhibit_99_urls(2020, 2020)

Fetching exhibit 99 URLs for year 2020  Processing month: 1

  Found 3068 filings in 2020-01  Processing month: 2  Found 4037 filings in 2020-02  Processing month: 3  Found 3762 filings in 2020-03  Processing month: 4  Found 4023 filings in 2020-04  Processing month: 5  Found 4768 filings in 2020-05  Processing month: 6  Found 2673 filings in 2020-06  Processing month: 7  Found 3688 filings in 2020-07  Processing month: 8  Found 4376 filings in 2020-08  Processing month: 9  Found 2489 filings in 2020-09  Processing month: 10  Found 3899 filings in 2020-10  Processing month: 11  Found 4426 filings in 2020-11  Processing month: 12  Found 2762 filings in 2020-12

print(f"{len(exhibit_99_urls_2020):,} exhibit 99 URLs fetched for 2020")exhibit_99_urls_2020

53,166 exhibit 99 URLs fetched for 2020

import ospandarallel.initialize(nb_workers=10, progress_bar=True)def download_ex_99_file(row, retry_counter=0):    accession_no = row["accessionNo"]    cik = row["cik"]    exhibit_99_url = row["exhibit99Url"]    exhibit_99_filename = exhibit_99_url.split("/")[-1]    publication_year = row["filedAt"].year    publication_month = row["filedAt"].month    file_name = f"{accession_no}_{exhibit_99_filename}"    file_path = (        f"ex-99-files/{publication_year}/{publication_month:02d}/{cik}/{file_name}"    )    os.makedirs(os.path.dirname(file_path), exist_ok=True)    content = None    try:        content = renderApi.get_filing(exhibit_99_url)    except Exception as e:        if retry_counter < 3:            return download_ex_99_file(row, retry_counter + 1)        else:            print(f"Failed: {exhibit_99_url}")            return    with open(file_path, "wb") as f:        f.write(content.encode("utf-8"))# download sample of 1000exhibit_99_urls_2020[:1000].parallel_apply(download_ex_99_file, axis=1)# download all# exhibit_99_urls_2020_df.parallel_apply(download_ex_99_file, axis=1)print("Download complete")

INFO: Pandarallel will run on 10 workers.INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%'), Label(value='0 / 100'))), HBox(childr…

Download complete

	accessionNo	filedAt	companyName	cik	ticker	items	documentFormatFiles
0	0001654954-20-014094	2020-12-31T17:20:28-05:00	TOMI Environmental Solutions, Inc.	314227	TOMZ	[Item 5.07: Submission of Matters to a Vote of...	[{'sequence': '1', 'size': '49284', 'documentU...
1	0001104659-20-141038	2020-12-31T17:10:56-05:00	GOLD RESOURCE CORP	1160791	GORO	[Item 5.02: Departure of Directors or Certain ...	[{'sequence': '1', 'size': '40187', 'documentU...
2	0001104659-20-141037	2020-12-31T17:08:55-05:00	McEwen Mining Inc.	314203	MUX	[Item 3.02: Unregistered Sales of Equity Secur...	[{'sequence': '1', 'size': '27630', 'documentU...
3	0001640334-20-003199	2020-12-31T17:07:50-05:00	Lexaria Bioscience Corp.	1348362	LEXX	[Item 7.01: Regulation FD Disclosure, Item 9.0...	[{'sequence': '1', 'size': '16594', 'documentU...
4	0001493152-20-024694	2020-12-31T17:04:37-05:00	MONMOUTH REAL ESTATE INVESTMENT CORP	67625	MNR.PC	[Item 7.01: Regulation FD Disclosure, Item 8.0...	[{'sequence': '1', 'size': '41725', 'documentU...

	filedAt	accessionNo	cik	ticker	type	exhibit99Url
0	2020-12-31T17:20:28-05:00	0001654954-20-014094	314227	TOMZ	EX-99.1	https://www.sec.gov/Archives/edgar/data/314227...
1	2020-12-31T17:10:56-05:00	0001104659-20-141038	1160791	GORO	EX-99.1	https://www.sec.gov/Archives/edgar/data/116079...
2	2020-12-31T17:08:55-05:00	0001104659-20-141037	314203	MUX	EX-99.1	https://www.sec.gov/Archives/edgar/data/314203...
3	2020-12-31T17:08:55-05:00	0001104659-20-141037	314203	MUX	EX-99.2	https://www.sec.gov/Archives/edgar/data/314203...
4	2020-12-31T17:07:50-05:00	0001640334-20-003199	1348362	LEXX	EX-99.1	https://www.sec.gov/Archives/edgar/data/134836...
5	2020-12-31T17:04:37-05:00	0001493152-20-024694	67625	MNR.PC	EX-99.1	https://www.sec.gov/Archives/edgar/data/67625/...
6	2020-12-31T17:01:35-05:00	0001469709-20-000101	1647705	GBBT	EX-99.1	https://www.sec.gov/Archives/edgar/data/164770...
7	2020-12-31T17:01:35-05:00	0001469709-20-000101	1647705	GBBT	EX-99.2	https://www.sec.gov/Archives/edgar/data/164770...
8	2020-12-31T17:00:11-05:00	0001104659-20-141025	1815903	PTPI	EX-99.1	https://www.sec.gov/Archives/edgar/data/181590...
9	2020-12-31T17:00:11-05:00	0001104659-20-141025	1815903	PTPI	EX-99.2	https://www.sec.gov/Archives/edgar/data/181590...
10	2020-12-31T17:00:10-05:00	0001477932-20-007599	1281984	WDLF	EX-99.1	https://www.sec.gov/Archives/edgar/data/128198...
11	2020-12-31T16:55:00-05:00	0001104659-20-141020	837852	IDEX	EX-99.1	https://www.sec.gov/Archives/edgar/data/837852...
12	2020-12-31T16:43:06-05:00	0001580695-20-000463	1372183	NXTP	EX-99.1	https://www.sec.gov/Archives/edgar/data/137218...
13	2020-12-31T16:43:06-05:00	0001580695-20-000463	1372183	NXTP	EX-99.2	https://www.sec.gov/Archives/edgar/data/137218...

	filedAt	accessionNo	cik	ticker	type	exhibit99Url
3726	2020-01-02 06:03:38-05:00	0001104659-20-000041	1526113	GNL	EX-99.1	https://www.sec.gov/Archives/edgar/data/152611...
3725	2020-01-02 06:04:33-05:00	0001104659-20-000050	1568162	RTL	EX-99.1	https://www.sec.gov/Archives/edgar/data/156816...
3724	2020-01-02 06:38:00-05:00	0000052795-20-000004	52795	AXE	EX-99.2	https://www.sec.gov/Archives/edgar/data/52795/...
3723	2020-01-02 06:38:00-05:00	0000052795-20-000004	52795	AXE	EX-99.1	https://www.sec.gov/Archives/edgar/data/52795/...
3722	2020-01-02 06:41:40-05:00	0001193125-20-000100	1337553	AERI	EX-99.1	https://www.sec.gov/Archives/edgar/data/133755...
...	...	...	...	...	...	...
49778	2020-12-31 17:07:50-05:00	0001640334-20-003199	1348362	LEXX	EX-99.1	https://www.sec.gov/Archives/edgar/data/134836...
49777	2020-12-31 17:08:55-05:00	0001104659-20-141037	314203	MUX	EX-99.2	https://www.sec.gov/Archives/edgar/data/314203...
49776	2020-12-31 17:08:55-05:00	0001104659-20-141037	314203	MUX	EX-99.1	https://www.sec.gov/Archives/edgar/data/314203...
49775	2020-12-31 17:10:56-05:00	0001104659-20-141038	1160791	GORO	EX-99.1	https://www.sec.gov/Archives/edgar/data/116079...
49774	2020-12-31 17:20:28-05:00	0001654954-20-014094	314227	TOMZ	EX-99.1	https://www.sec.gov/Archives/edgar/data/314227...

How to Download Exhibit 99 Files from Form 8-K Filings

Tutorial Overview

What is Exhibit 99?

Example Exhibit 99 Files

Setup

Find URLs of Exhibit 99 Files

Download Exhibit 99 Files