{  "query": "formType:"13F" AND filedAt:[YEAR-MONTH-01 TO YEAR-MONTH-31]",  "from": "0",  "size": "50",  "sort": [{"filedAt": {"order": "desc"}}]}

import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport matplotlib.style as styleimport matplotlib.ticker as mtickimport textwrapfrom functools import wrapsstyle.use("default")params = {    "axes.labelsize": 8,    "font.size": 8,    "legend.fontsize": 8,    "xtick.labelsize": 8,    "ytick.labelsize": 8,    "text.usetex": False,    "font.family": "sans-serif",}plt.rcParams.update(params)def round_decorator(decimals):    def decorator(func):        def wrapper(*args, **kwargs):            result = func(*args, **kwargs)            return round(result, decimals)        return wrapper    return decoratordef memoize(func):    cache = {}    @wraps(func)    def wrapper(*args, **kwargs):        # convert pd.Series/list to a hashable tuple for caching        hashable_args = tuple(            arg if not isinstance(arg, (pd.Series, list)) else tuple(arg)            for arg in args        )        hashable_kwargs = {            k: v if not isinstance(v, (pd.Series, list)) else tuple(v)            for k, v in kwargs.items()        }        key = (hashable_args, tuple(sorted(hashable_kwargs.items())))        if key not in cache:            cache[key] = func(*args, **kwargs)        return cache[key]    return wrapper

funds_filings = pd.read_csv('./data/13f-filings-metadata.csv')funds_filings

funds_per_year_stats = (    funds_filings.drop_duplicates(subset=["cik", "year"])    .groupby(funds_filings["year"])["cik"]    .count()    .to_frame()    .reset_index()    .rename(columns={"filedAt": "year", "cik": "count"}))print("Number of active funds per year (2014 - 2023):")funds_per_year_stats[-10:]

Number of active funds per year (2014 - 2023):

fig, ax = plt.subplots(figsize=(4, 3))ax.plot(funds_per_year_stats["year"], funds_per_year_stats["count"], color="black")ax.set_xticks(funds_per_year_stats["year"])ax.set_xticklabels(funds_per_year_stats["year"], rotation=90)ax.yaxis.grid(True, linestyle="--", which="major", color="grey", alpha=0.25)ax.spines["top"].set_visible(False)ax.spines["right"].set_visible(False)ax.set_ylabel("Investment Managers")ax.get_yaxis().set_major_formatter(    mtick.FuncFormatter(lambda x, p: format(int(x), ",")))ax.axvspan(1999.5, 2001.5, alpha=0.1, color="red", zorder=-100)ax.axvspan(2006.5, 2008.5, alpha=0.1, color="red", zorder=-100)ax.axvspan(2019.5, 2021.5, alpha=0.1, color="red", zorder=-100)ax.text(    2000.5,    ax.get_ylim()[1] - 400,    "Dot-comBubble",    horizontalalignment="center",    verticalalignment="center",    color="red",    alpha=0.5,)ax.text(    2007.5,    ax.get_ylim()[1] - 400,    "GFC",    horizontalalignment="center",    verticalalignment="center",    color="red",    alpha=0.5,)ax.text(    2020.5,    ax.get_ylim()[1] - 400,    "COVID",    horizontalalignment="center",    verticalalignment="center",    color="red",    alpha=0.5,)caption = (    "Figure: Active Institutional Investment Managers per Year. The figure shows the number of institutional investment managers that filed at least one 13F-HR form in a given year. "    + "Managers represent funds such as Bridgewater Associates or Pershing Square. Funds are counted based on their CIK and the year a corresponding 13F was filed.")caption = "".join(textwrap.wrap(caption, 60))fig.text(0.5, -0.30, caption, ha="center", fontsize=9)plt.tight_layout()plt.show()

total_number_of_funds = funds_filings["cik"].nunique()print("Total number of funds (1998 - 2023):", format(total_number_of_funds, ","))

Total number of funds (1998 - 2023): 13,035

funds_per_year = (    funds_filings[["cik", "year"]]    .copy()    .drop_duplicates(subset=["cik", "year"])    .sort_values(by=["year"])    .reset_index(drop=True))entering_and_exiting_funds = [    # {    #   "year": 2013,    #   "entering": 210,    #   "exiting": 10    # },    # {    #   "year": 2014,    #   "entering": 41,    #   "exiting": 104    # }    # ...]for year in funds_per_year["year"].unique():    funds_year = funds_per_year[funds_per_year["year"] == year]    stats = {"year": year, "entering": 0, "exiting": 0}    if len(entering_and_exiting_funds) == 0:        stats["entering"] = len(funds_year["cik"])        entering_and_exiting_funds.append(stats)        continue    funds_prev_year = funds_per_year[funds_per_year["year"] == year - 1]    # calculate number of CIKs that are in funds_year, but not in funds_prev_year    # these are new funds    new_funds = funds_year[~funds_year["cik"].isin(funds_prev_year["cik"])]    # calculate number of CIKs that are in funds_prev_year, but not in funds_year    # these are funds that stopped filing    funds_that_stopped_filing = funds_prev_year[        ~funds_prev_year["cik"].isin(funds_year["cik"])    ]    stats["entering"] = len(new_funds["cik"])    stats["exiting"] = len(funds_that_stopped_filing["cik"])    entering_and_exiting_funds.append(stats)entering_and_exiting_funds = pd.DataFrame(entering_and_exiting_funds)entering_and_exiting_funds.sort_values(by=["year"], inplace=True)print("Number of entering and exiting funds per year")entering_and_exiting_funds[-10:]

Number of entering and exiting funds per year

fig, ax = plt.subplots(figsize=(4, 3))ax.bar(    entering_and_exiting_funds["year"][1:],    entering_and_exiting_funds["entering"][1:],    color="black",    label="New Funds",    zorder=100,)ax.bar(    entering_and_exiting_funds["year"][1:],    entering_and_exiting_funds["exiting"][1:] * -1,    color="grey",    label="Funds that Stopped Filing",    zorder=100,)ax.get_yaxis().set_major_formatter(    mtick.FuncFormatter(lambda x, p: format(int(x), ",")))ax.set_xticks(entering_and_exiting_funds["year"][1:])ax.set_xticklabels(entering_and_exiting_funds["year"][1:], rotation=90)ax.yaxis.grid(True, linestyle="--", which="major", color="grey", alpha=0.25)ax.spines["top"].set_visible(False)ax.spines["right"].set_visible(False)# add text "New Funds" onto the figureax.text(    2005,    680,    "New Funds",    horizontalalignment="center",    verticalalignment="center",    color="black",    fontsize=8,)ax.text(    2005,    -400,    "Exiting Funds",    horizontalalignment="center",    verticalalignment="center",    color="black",    fontsize=8,)ax.axvspan(1999.5, 2001.5, alpha=0.1, color="red", zorder=-100)ax.axvspan(2006.5, 2008.5, alpha=0.1, color="red", zorder=-100)ax.axvspan(2019.5, 2021.5, alpha=0.1, color="red", zorder=-100)# add text labels for financial crisis and covidax.text(    2002,    ax.get_ylim()[1] - 200,    "Dot-comBubble",    horizontalalignment="center",    verticalalignment="center",    color="red",    alpha=0.5,)ax.text(    2007.5,    ax.get_ylim()[1] - 200,    "GFC",    horizontalalignment="center",    verticalalignment="center",    color="red",    alpha=0.5,)ax.text(    2020.5,    ax.get_ylim()[1] - 200,    "COVID",    horizontalalignment="center",    verticalalignment="center",    color="red",    alpha=0.5,)ax.set_ylabel("Investment Managers - Entering/Exiting")caption = "Figure: Number of New and Exiting Institutional Investment Managers per Year. The figure shows the number of new and exiting funds per year. A new fund is defined as a fund that filed a 13F-HR form for the first time in a given year. A fund that stopped filing is defined as a fund that filed a 13F-HR form in the previous year, but not in the current year. Funds are counted based on their CIK."caption = "".join(textwrap.wrap(caption, 60))fig.text(0.5, -0.40, caption, ha="center", fontsize=9)plt.show()

cik_of_early_funds = funds_filings[funds_filings['year'].isin([1998,1999,2000])]['cik'].unique()cik_of_active_funds = funds_filings[funds_filings['year'].isin([2023])]['cik'].unique()# create intersection of cik_of_early_funds and cik_of_active_fundscik_of_early_funds = set(cik_of_early_funds)cik_of_active_funds = set(cik_of_active_funds)intersection = cik_of_early_funds.intersection(cik_of_active_funds)intersection_pct = round(len(intersection) / len(cik_of_early_funds) * 100, 2)print("Number of funds that were active in...")print("1998, 1999 and 2000:\t\t", len(cik_of_early_funds))print("1998, 1999, 2000 and 2023:\t", len(intersection), f"({intersection_pct}% survival rate)")

Number of funds that were active in...1998, 1999 and 2000:   22561998, 1999, 2000 and 2023:  659 (29.21% survival rate)

ciks = funds_filings["cik"].unique()years_active = []for cik in ciks:    years = funds_filings[funds_filings["cik"] == cik]["year"].unique()    years_active.append({"cik": cik, "years_active": len(years), "years": years})years_active = pd.DataFrame(years_active)fig, ax = plt.subplots(figsize=(4, 3))bins_range = range(1, 27)ax.hist(years_active["years_active"], bins=bins_range, color="black")# move all bars by 0.5 to the leftax.set_xticks(np.array(bins_range) - 0.5)ax.set_xticklabels(bins_range, rotation=90, fontsize=8, ha="center")ax.yaxis.grid(True, linestyle="--", which="major", color="grey", alpha=0.25)ax.spines["top"].set_visible(False)ax.spines["right"].set_visible(False)ax.set_xlabel("Years Active")ax.set_ylabel("Number of Funds")caption = (    "Figure: Distribution of Life Span of Funds (1998 - 2023). The figure shows the distribution of "    + "the number of years a fund was active. A fund is considered active if it filed a 13F-HR form "    + "in a given year. Funds are counted based on their CIK.")caption = "".join(textwrap.wrap(caption, 60))fig.text(0.5, -0.30, caption, ha="center", fontsize=9)plt.show()print("Descriptive Statistics of the Life Spans of Funds (1998 - 2023)")pd.DataFrame(years_active["years_active"].describe().round(2))

Descriptive Statistics of the Life Spans of Funds (1998 - 2023)

{  "formType": "13F-HR",  "accessionNo": "0001398344-23-022864",  "periodOfReport": "2023-09-30",  "documentFormatFiles": [    {      "sequence": "1",      "documentUrl": "https://www.sec.gov/Archives/edgar/data/2004720/000139834423022864/xslForm13F_X02/primary_doc.xml",      "type": "13F-HR"    }  ]}

cover_pages = pd.read_csv("./data/cover-pages-cleaned.csv", parse_dates=["periodOfReport", "filedAt"])print("13F Cover Pages (2013 - 2023)")cover_pages.sort_values(by=["filedAt"])

13F Cover Pages (2013 - 2023)

def plot_holding_values(cover_pages):    fig, ax = plt.subplots(figsize=(6, 3))    cover_pages_grouped = cover_pages.groupby("periodOfReport")    cover_pages_grouped = (        cover_pages_grouped["holdingsValueStandardized"].sum().to_frame().reset_index()    )    ax.plot(        cover_pages_grouped["periodOfReport"],        cover_pages_grouped["holdingsValueStandardized"],        color="black",    )    xticks = cover_pages_grouped["periodOfReport"].dt.date.unique().tolist()    ax.set_xticks(xticks)    ax.set_xticklabels(xticks, rotation=90)    ax.yaxis.grid(True, linestyle="--", which="major", color="grey", alpha=0.25)    ax.spines["top"].set_visible(False)    ax.spines["right"].set_visible(False)    # draw vertical lines for every Q1. e.g. 2013-03-31, 2014-03-31, 2015-03-31, ...    y_min = ax.get_ylim()[0]    for year in cover_pages_grouped["periodOfReport"].dt.year.unique():        x_value = str(year) + "-03-31"        x_value = pd.to_datetime(x_value, format="%Y-%m-%d")        if (x_value in cover_pages_grouped["periodOfReport"].values) == False:            continue        y_value = cover_pages_grouped[cover_pages_grouped["periodOfReport"] == x_value][            "holdingsValueStandardized"        ].values[0]        ax.vlines(            x=x_value,            ymin=y_min,            ymax=y_value,            color="grey",            alpha=0.25,            zorder=-100,            linestyle="--",            linewidth=0.8,        )    ax.set_ylabel("Value of all Holdings (USD, Billions)")    ax.get_yaxis().set_major_formatter(        mtick.FuncFormatter(lambda x, p: format(int(x) / 1000000000, ",.0f"))    )    ax.axvspan(        pd.to_datetime("2020-03-31", format="%Y-%m-%d"),        pd.to_datetime("2021-12-31", format="%Y-%m-%d"),        alpha=0.1,        color="red",        zorder=-100,    )    ax.text(        pd.to_datetime("2020-12-31", format="%Y-%m-%d"),        ax.get_ylim()[1] * 0.95,        "COVID",        horizontalalignment="center",        verticalalignment="center",        color="red",        alpha=0.5,    )    ax.set_title(        "Holding Values across all Investment Managers per Quarter (2013 - 2023)"    )    total_number_of_funds = cover_pages["filerCik"].nunique()    number_of_13F_filings = cover_pages["accessionNo"].nunique()    caption = (        "Figure: Total Value of all Holdings per Quarter. The figure shows the total dollar value of all holdings per quarterly reporting period (end of March, June, September and December). "        + "The value is calculated by summing the value of all holdings reported in a given quarter per investment manager. The value is standardized to USD billions. "        + f"A total of {format(total_number_of_funds, ',')} investment managers and {format(number_of_13F_filings, ',')} 13F filings are included in the figure. "    )    caption = "".join(textwrap.wrap(caption, 70))    fig.text(0.5, -0.35, caption, ha="center", fontsize=9)    # replace x labels with year and quarter    def get_quarter(period):        if period.month == 3:            return "Q1"        elif period.month == 6:            return "Q2"        elif period.month == 9:            return "Q3"        elif period.month == 12:            return "Q4"        else:            return "Q0"    def format_period(period):        qtr = get_quarter(period)        if qtr != "Q1":            return qtr        return str(period)[0:4] + " " + get_quarter(period)    ax.set_xticklabels(        [format_period(period) for period in xticks],        rotation=90,        ha="center",        fontsize=8,    )    plt.tight_layout()    plt.show()plot_holding_values(cover_pages)

def detect_outliers(df, column):    outliers_df = df.copy()    outliers_df["pctChange"] = outliers_df[column].pct_change()    outlier_threshold_1 = 20    outliers_df["outlier"] = outliers_df["pctChange"] > outlier_threshold_1    outliers_df["outlierPrevRow"] = (        outliers_df["outlier"].shift(1).fillna(False).astype(bool)    )    outliers_df["outlier"] = (outliers_df["outlier"] == False) & outliers_df[        "outlierPrevRow"    ] & (outliers_df["pctChange"] < 0.7) & (        outliers_df["pctChange"] > -0.5    ) | outliers_df[        "outlier"    ]    outliers_df["outlierPrevRow"] = (        outliers_df["outlier"].shift(1).fillna(False).astype(bool)    )    outliers_df["outlier"] = (outliers_df["pctChange"] < -0.96) & ~outliers_df[        "outlier"    ] & ~outliers_df["outlierPrevRow"] | outliers_df["outlier"]    outliers_df["outlierPrevRow"] = (        outliers_df["outlier"].shift(1).fillna(False).astype(bool)    )    return outliers_dfprint("Example of a fund (CIK: 1906802) with outliers:")outliers_example = detect_outliers(    cover_pages[cover_pages["filerCik"] == 1906802], "holdingsValueStandardized")outliers_example[    [        "periodOfReport",        "numberHoldings",        "holdingsValueReported",        "holdingsValueStandardized",        "outlier",    ]]

Example of a fund (CIK: 1906802) with outliers:

active_ciks_in_2023 = cover_pages[cover_pages["periodOfReport"].dt.year == 2023]['filerCik'].unique().tolist()active_ciks_wo_outliers = []active_ciks_with_outliers = []for cik in active_ciks_in_2023:    cik_df = cover_pages[cover_pages['filerCik'] == cik]    outliers = detect_outliers(cik_df, 'holdingsValueStandardized')    has_outliers = outliers['outlier'].any()    if has_outliers == False:        active_ciks_wo_outliers.append(cik)    else:        active_ciks_with_outliers.append(cik)pct_of_funds_w_outliers = round(len(active_ciks_with_outliers) / len(active_ciks_in_2023) * 100, 2)print("Active funds in 2023:", format(len(active_ciks_in_2023), ","))print("Active funds with outliers:", format(len(active_ciks_with_outliers), ","), f"({pct_of_funds_w_outliers}%)")print("Active funds without outliers:", format(len(active_ciks_wo_outliers), ","))

Active funds in 2023: 6,966Active funds with outliers: 814 (11.69%)Active funds without outliers: 6,152

from scipy import statsdef get_time_factor(return_period=None):    if not return_period in ["daily", "monthly", "quarterly", "Q", "yearly"]:        raise ValueError(            'return_period must be one of: ["daily", "weekly", "monthly", "quarterly", "yearly"]'        )    if return_period == "daily":        return 252    elif return_period == "monthly":        return 12    elif return_period == "quarterly" or return_period == "Q":        return 4    elif return_period == "yearly":        return 1@round_decorator(3)def calculate_annualized_returns(fund_cover_pages):    quarter = fund_cover_pages["periodOfReport"].dt.quarter.values[-1]    period_mask = fund_cover_pages["periodOfReport"].dt.quarter == quarter    holding_values = fund_cover_pages[period_mask]    annual_returns = (1 + holding_values["holdingsValueStandardized"].pct_change()) ** (        1 / 1    ) - 1    periods = holding_values["periodOfReport"]    return pd.DataFrame({"annual_returns": annual_returns, "periods": periods}).dropna()@round_decorator(3)def calculate_beta(portfolio_returns, benchmark_returns):    if len(portfolio_returns) == 0:        return np.nan    covariance = np.cov(portfolio_returns, benchmark_returns)[0][1]    benchmark_variance = np.var(benchmark_returns)    beta = covariance / benchmark_variance    return beta@round_decorator(3)def calculate_alpha(    portfolio_returns, benchmark_returns, risk_free_rate, beta, return_period):    if len(portfolio_returns) == 0:        return np.nan    time_factor = get_time_factor(return_period)    portfolio_annual_return = np.mean(portfolio_returns) * time_factor    benchmark_annual_return = np.mean(benchmark_returns) * time_factor    alpha = portfolio_annual_return - (        risk_free_rate + beta * (benchmark_annual_return - risk_free_rate)    )    return alpha@round_decorator(3)def calculate_sharpe_ratio(portfolio_returns, risk_free_rate, return_period):    if len(portfolio_returns) == 0:        return np.nan    time_factor = get_time_factor(return_period)    annualized_return = np.mean(portfolio_returns) * time_factor    annualized_std = np.std(portfolio_returns) * np.sqrt(time_factor)    sharpe_ratio = (annualized_return - risk_free_rate) / annualized_std    return sharpe_ratio@round_decorator(3)def calculate_sortino_ratio(    portfolio_returns, minimum_acceptable_return, return_period):    if len(portfolio_returns) == 0:        return np.nan    time_factor = get_time_factor(return_period)    # convert annualized MAR to a daily/weekly/quarterly rate    normalized_mar = (1 + minimum_acceptable_return) ** (1 / time_factor) - 1    # difference between portfolio returns and the MAR    excess_returns = portfolio_returns - normalized_mar    negative_excess_returns = excess_returns[excess_returns < 0]    if len(negative_excess_returns) == 0:        return np.nan    average_return = np.mean(portfolio_returns)    downside_deviation = np.std(negative_excess_returns)    annualized_return = average_return * time_factor    annualized_downside_deviation = downside_deviation * np.sqrt(time_factor)    if annualized_downside_deviation == 0:        return np.nan    sortino_ratio = (annualized_return - normalized_mar) / annualized_downside_deviation    return sortino_ratiodef calculate_t_stat(portfolio_returns, benchmark_returns):    t_statistic, p_value = stats.ttest_ind(portfolio_returns, benchmark_returns)    return t_statistic, p_value

import yfinance as yfdef get_spy_data():    start_date = cover_pages["periodOfReport"].min() - pd.Timedelta(days=3)    end_date = cover_pages["periodOfReport"].max() + pd.Timedelta(days=3)    print("Start date:", start_date)    print("End date:", end_date)    spy_data = yf.download(        "SPY",        start=start_date,        end=end_date,    )    # check for each portfolio_values["date"].dt.date.unique() if it exists in spy_data.index    # if not, check if next day exists in spy_data.index. if not, check if next next day exists in spy_data.index    # add all dates that exist to dates_filter    dates_filter = []    for date in pd.to_datetime(cover_pages["periodOfReport"].dt.date.unique().tolist()):        if date in spy_data.index:            dates_filter.append(date)        else:            new_date = date - pd.Timedelta(days=1)            if new_date in spy_data.index:                dates_filter.append(new_date)            else:                new_date = date - pd.Timedelta(days=2)                if new_date in spy_data.index:                    dates_filter.append(new_date)                else:                    print(f"no date for {date}")    spy_data = spy_data[spy_data.index.isin(dates_filter)].copy()    spy_data.drop(columns=["Open", "High", "Low", "Close", "Volume"], inplace=True)    # standardize dates to that they all represent the end of the quarter    # e.g. "2013-06-28" -> "2013-06-30"    def to_quarter_end(date):        if date.month == 3:            return date.replace(day=31)        elif date.month == 6:            return date.replace(day=30)        elif date.month == 9:            return date.replace(day=30)        elif date.month == 12:            return date.replace(day=31)        else:            return date    new_index = pd.Series(spy_data.index).apply(to_quarter_end)    spy_data.set_index(new_index, inplace=True)    return spy_data@memoizedef get_benchmark_returns(periods):    adj_close = spy_data[spy_data.index.isin(periods)]    returns = adj_close["Adj Close"].pct_change().dropna()    return returnsspy_data = get_spy_data()print(f"Rows: {len(spy_data)}")spy_data.head()

Start date: 2013-09-27 00:00:00End date: 2023-10-03 00:00:00[*********************100%%**********************]  1 of 1 completedRows: 41

risk_free_rate = 0.02  # annualizeddef get_fund_performance_metrics(fund_cover_pages):    fund_cover_pages = fund_cover_pages.drop_duplicates(subset=["periodOfReport"])    fund_cover_pages = fund_cover_pages[        fund_cover_pages["holdingsValueStandardized"] > 0    ]    metrics = {}    if len(fund_cover_pages) < 1:        return None    last_row = fund_cover_pages.iloc[-1]    quarter = last_row["periodOfReport"].quarter    recent_year = last_row["periodOfReport"].year    metrics["assetValue"] = last_row["holdingsValueStandardized"].sum()    metrics["#holdings"] = last_row["numberHoldings"]    metrics["firstPeriod"] = fund_cover_pages.iloc[0]["periodOfReport"]    periods = fund_cover_pages["periodOfReport"].dt.date.unique().tolist()    metrics["#periods"] = len(periods)    if len(fund_cover_pages) <= 4:        return metrics    metrics["maxDrawdownQtrPct"] = round(        fund_cover_pages["holdingsValueStandardized"].pct_change().min() * 100, 2    )    row_5yr = fund_cover_pages[        (fund_cover_pages["periodOfReport"].dt.year == recent_year - 5)        & (fund_cover_pages["periodOfReport"].dt.quarter == quarter)    ]    row_3yr = fund_cover_pages[        (fund_cover_pages["periodOfReport"].dt.year == recent_year - 3)        & (fund_cover_pages["periodOfReport"].dt.quarter == quarter)    ]    row_1yr = fund_cover_pages[        (fund_cover_pages["periodOfReport"].dt.year == recent_year - 1)        & (fund_cover_pages["periodOfReport"].dt.quarter == quarter)    ]    quarterly_returns = (        fund_cover_pages["holdingsValueStandardized"].pct_change().dropna()    )    periods = fund_cover_pages["periodOfReport"].dt.date.unique().tolist()    benchmark_returns = get_benchmark_returns(periods)    if len(row_1yr) > 0:        if row_1yr["holdingsValueStandardized"].values[0] > 0:            pct_change_1yr = (                last_row["holdingsValueStandardized"]                - row_1yr["holdingsValueStandardized"].values[0]            ) / row_1yr["holdingsValueStandardized"].values[0]        else:            pct_change_1yr = 0        annualized_volatility = fund_cover_pages[            "holdingsValueStandardized"        ].pct_change().std() * np.sqrt(4)        beta = calculate_beta(quarterly_returns, benchmark_returns)        alpha = calculate_alpha(            quarterly_returns, benchmark_returns, risk_free_rate, beta, "Q"        )        sharpe_ratio = calculate_sharpe_ratio(quarterly_returns, risk_free_rate, "Q")        sortino_ratio = calculate_sortino_ratio(quarterly_returns, risk_free_rate, "Q")        t_stat_benchmark, p_value_benchmark = calculate_t_stat(            quarterly_returns, benchmark_returns        )        metrics["volYr"] = round(annualized_volatility, 2)        metrics["sharpe"] = sharpe_ratio        metrics["sortino"] = sortino_ratio        metrics["beta"] = beta        metrics["alpha"] = alpha        metrics["t-stat"] = round(t_stat_benchmark, 3)        metrics["p"] = round(p_value_benchmark, 3)        metrics["assets.pct_change_1yr"] = round(pct_change_1yr * 100, 2)    if len(row_3yr) > 0 and row_3yr["holdingsValueStandardized"].values[0] > 0:        pct_change_3yr = (            last_row["holdingsValueStandardized"]            - row_3yr["holdingsValueStandardized"].values[0]        ) / row_3yr["holdingsValueStandardized"].values[0]        metrics["assets.pct_change_3yr"] = round(pct_change_3yr * 100, 2)    else:        metrics["assets.pct_change_3yr"] = np.nan    if len(row_5yr) > 0 and row_5yr["holdingsValueStandardized"].values[0] > 0:        pct_change_5yr = (            last_row["holdingsValueStandardized"]            - row_5yr["holdingsValueStandardized"].values[0]        ) / row_5yr["holdingsValueStandardized"].values[0]        metrics["assets.pct_change_5yr"] = round(pct_change_5yr * 100, 2)    else:        metrics["assets.pct_change_5yr"] = np.nan    return metricsprint("Single fund performance metrics (CIK: 1759236)")get_fund_performance_metrics(cover_pages[cover_pages["filerCik"] == 1759236])

Single fund performance metrics (CIK: 1759236)

{'assetValue': 324876323, '#holdings': 111, 'firstPeriod': Timestamp('2019-03-31 00:00:00'), '#periods': 19, 'maxDrawdownQtrPct': -19.87, 'volYr': 0.29, 'sharpe': 0.932, 'sortino': 1.784, 'beta': 1.046, 'alpha': 0.147, 't-stat': 0.924, 'p': 0.362, 'assets.pct_change_1yr': 30.9, 'assets.pct_change_3yr': 82.12, 'assets.pct_change_5yr': nan}

risk_free_rate = 0.02  # annualizeddef get_performance_metrics(cover_pages):    performance_by_cik = {}    for cik in cover_pages["filerCik"].unique():        try:            metrics = get_fund_performance_metrics(                cover_pages[cover_pages["filerCik"] == cik]            )            if metrics is not None:                performance_by_cik[cik] = metrics        except Exception as e:            print(f"Error for cik {cik}: {e}")    performance_by_cik = pd.DataFrame.from_dict(        performance_by_cik, orient="index"    ).reset_index(names=["cik"])    return performance_by_cik

all_funds_metrics = get_performance_metrics(cover_pages)

active_funds_metrics = all_funds_metrics[all_funds_metrics["cik"].isin(active_ciks_wo_outliers)]

long_active_funds_mask = active_funds_metrics["firstPeriod"].dt.year <= 2018positive_sharpe_mask = active_funds_metrics["sharpe"] > 0positive_5yr_change_mask = active_funds_metrics["assets.pct_change_5yr"] > 0best_performing = active_funds_metrics[    long_active_funds_mask & positive_sharpe_mask & positive_5yr_change_mask].copy()best_performing.sort_values(by=["sharpe"], ascending=False, inplace=True)best_performing.reset_index(drop=True, inplace=True)print(f"Ranking of {len(best_performing):,} filtered funds active in 2023 ranked by their Sharpe ratio:")best_performing.head(10)

Ranking of 2,047 filtered funds active in 2023 ranked by their Sharpe ratio:

def get_quarter(period):    if period.month == 3:        return "Q1"    elif period.month == 6:        return "Q2"    elif period.month == 9:        return "Q3"    elif period.month == 12:        return "Q4"    else:        return perioddef format_period(period):    qtr = get_quarter(period)    if qtr == "Q3":        return qtr    if qtr != "Q1":        # return qtr        return ""    return str(period)[0:4] + " " + get_quarter(period)def plot_assets_over_time(cover_pages_of_cik, ax=None, show_y_axis_label=True, title=None):    cik = cover_pages_of_cik.iloc[0]["filerCik"]    if ax is None:        fig, ax = plt.subplots(figsize=(6, 3))    ax.plot(        cover_pages_of_cik["periodOfReport"],        cover_pages_of_cik["holdingsValueStandardized"],        color="black",    )    periods = cover_pages_of_cik["periodOfReport"].dt.date.unique().tolist()    benchmark = spy_data[spy_data.index.isin(periods)].copy()    # scale benchmark to same scale as holdingsValueStandardized    benchmark["value_positions"] = (        benchmark["Adj Close"] / benchmark["Adj Close"].iloc[0]    ) * cover_pages_of_cik["holdingsValueStandardized"].values[0]    ax.plot(        benchmark.index,        benchmark["value_positions"],        color="blue",        linestyle="--",        alpha=0.5,    )    xticks = cover_pages_of_cik["periodOfReport"].dt.date.unique().tolist()    ax.set_xticks(xticks)    ax.set_xticklabels(xticks, rotation=90, fontsize=6)    ax.yaxis.grid(True, linestyle="--", which="major", color="grey", alpha=0.25)    ax.spines["top"].set_visible(False)    ax.spines["right"].set_visible(False)    if show_y_axis_label:        ax.set_ylabel("Asset Value (USD, Millions)")    ax.get_yaxis().set_major_formatter(        mtick.FuncFormatter(lambda x, p: format(int(x) / 1000000, ",.0f"))    )    # ax.set_title(f"CIK {cik} - Holdings Value over Time")    ax.set_title(f"CIK {cik}" if title is None else title)    y_min = ax.get_ylim()[0]    for year in cover_pages_of_cik["periodOfReport"].dt.year.unique():        x_value = str(year) + "-03-31"        x_value = pd.to_datetime(x_value, format="%Y-%m-%d")        if (x_value in cover_pages_of_cik["periodOfReport"].values) == False:            continue        y_value = cover_pages_of_cik[cover_pages_of_cik["periodOfReport"] == x_value][            "holdingsValueStandardized"        ].values[0]        ax.vlines(            x=x_value,            ymin=y_min,            ymax=y_value,            color="grey",            alpha=0.25,            zorder=-100,            linestyle="--",            linewidth=0.8,        )    ax.axvspan(        pd.to_datetime("2020-03-31", format="%Y-%m-%d"),        pd.to_datetime("2021-12-31", format="%Y-%m-%d"),        alpha=0.1,        color="red",        zorder=-100,    )    ax.text(        pd.to_datetime("2020-12-31", format="%Y-%m-%d"),        ax.get_ylim()[1] * 0.95,        "COVID",        horizontalalignment="center",        verticalalignment="center",        color="red",        alpha=0.5,    )    pct_growth_5yr = 0    pct_growth_3yr = 0    pct_growth_1yr = 0    fund_metrics = all_funds_metrics[all_funds_metrics["cik"] == cik]    try:        pct_growth_5yr = "{:.2f}%".format(fund_metrics.iloc[0]["assets.pct_change_5yr"])    except:        pass    try:        pct_growth_3yr = "{:.2f}%".format(fund_metrics.iloc[0]["assets.pct_change_3yr"])    except:        pass    try:        pct_growth_1yr = "{:.2f}%".format(fund_metrics.iloc[0]["assets.pct_change_1yr"])    except:        pass    # add text to figure    # get x-anchor by getting first x value of xticks    x_anchor = xticks[0]    ax.text(        x_anchor,        ax.get_ylim()[1] * 0.90,        f"5 yr return: {pct_growth_5yr}3 yr return: {pct_growth_3yr}1 yr return: {pct_growth_1yr}",        horizontalalignment="left",        verticalalignment="top",        color="black",        alpha=0.8,    )    number_of_13F_filings = cover_pages_of_cik["accessionNo"].nunique()    caption = (        f"Figure: Quarterly Value of all Holdings of Fund {cik}. The figure shows the total dollar value of all holdings per quarterly reporting period (end of March, June, September and December). "        + f"A total of {format(number_of_13F_filings, ',')} 13F filings are included in the figure. "    )    caption = "".join(textwrap.wrap(caption, 70))    # replace x labels with year and quarter    ax.set_xticklabels(        [format_period(period) for period in xticks],        rotation=90,        ha="center",        fontsize=8,    )cik = best_performing.iloc[0]["cik"]print("CIK:", cik)plot_assets_over_time(cover_pages[cover_pages["filerCik"] == cik], title=f"Holding Values per Quarter (CIK: {cik})")

CIK: 1737109

fig, axs = plt.subplots(5, 3, figsize=(9, 12))for i in range(0, 15):    cik = best_performing.iloc[i]['cik']    row = i // 3    col = i % 3    show_y_axis_label = i % 3 == 0    plot_assets_over_time(cover_pages[cover_pages["filerCik"] == cik], ax=axs[row, col], show_y_axis_label=show_y_axis_label)fig.suptitle("Top 15 Investment Managers by Total Asset Growth", fontsize=12)plt.tight_layout()plt.show()

cik_filter = [1525947, 1801507, 1476534, 96223, 1532618, 1761013, 1781948, 1720777, 1820680, 714142]data_to_plot = all_funds_metrics[    (all_funds_metrics['alpha'].isna() == False)    & (all_funds_metrics["alpha"] < 3)    & (all_funds_metrics["alpha"] > -3)    & (all_funds_metrics["#periods"] > 5)    & (all_funds_metrics["#holdings"] > 5)    & (all_funds_metrics["assets.pct_change_3yr"].isna() == False)    & (all_funds_metrics["maxDrawdownQtrPct"] > -70)]data_to_plot.sort_values(by='alpha', ascending=False).head(10)

fig, ax = plt.subplots(figsize=(6, 3))x = data_to_plot["alpha"]ax.hist(x, bins=100, color="black", alpha=0.9)ax.set_yscale("log")ax.set_xlabel("Alpha")ax.set_ylabel("Number of Funds")ax.axhline(y=100, color="grey", alpha=0.5, linestyle="--", linewidth=0.8, zorder=-100)ax.axhline(y=10, color="grey", alpha=0.5, linestyle="--", linewidth=0.8, zorder=-100)ax.axvline(x=0, color="red", alpha=0.5, linestyle="--", linewidth=1.8, zorder=100)ax.spines["top"].set_visible(False)ax.spines["right"].set_visible(False)ax.set_title("Alpha Distribution of All Funds (2013 - 2023)")plt.tight_layout()plt.show()print("Alpha Distribution of All Funds (2013 - 2023) - Descriptive Stats")pd.DataFrame(data_to_plot["alpha"].describe().round(2))

Alpha Distribution of All Funds (2013 - 2023) - Descriptive Stats

	year	entering	exiting
16	2014	552	253
17	2015	564	219
18	2016	445	273
19	2017	497	357
20	2018	677	309
21	2019	618	295
22	2020	734	362
23	2021	949	354
24	2022	1175	334
25	2023	739	517

	years_active
count	13035.00
mean	7.22
std	6.53
min	1.00
25%	2.00
50%	5.00
75%	10.00
max	26.00

	filerCik	filedAt	periodOfReport	accessionNo	numberHoldings	holdingsValueReported	holdingsValueStandardized
1898	1040592	2013-10-01 09:47:12-04:00	2013-09-30	0001040592-13-000013	59	4121959	4121959000
2988	767684	2013-10-01 11:38:08-04:00	2013-09-30	0000767684-13-000012	122	881429	881429000
790	1452208	2013-10-01 12:08:50-04:00	2013-09-30	0001144204-13-053268	81	581943	581943000
1691	805676	2013-10-01 12:12:11-04:00	2013-09-30	0001144204-13-053271	541	1307390845	1307390845
1869	1426588	2013-10-01 16:57:06-04:00	2013-09-30	0001426588-13-000006	182	194373	194373000
...	...	...	...	...	...	...	...
88386	2004720	2023-12-21 18:22:18-05:00	2018-12-31	0001398344-23-023296	482	129656986	129656986
95270	2004720	2023-12-21 18:24:48-05:00	2019-06-30	0001398344-23-023298	469	160161167	160161167
90360	2004720	2023-12-21 18:26:06-05:00	2019-03-31	0001398344-23-023299	472	142784489	142784489
99088	2004720	2023-12-21 18:28:56-05:00	2019-09-30	0001398344-23-023302	487	167369301	167369301
105618	2004720	2023-12-21 18:29:23-05:00	2019-12-31	0001398344-23-023303	475	183817738	183817738

	periodOfReport	numberHoldings	holdingsValueReported	holdingsValueStandardized	outlier
157623	2022-03-31	143	132484	132484000	False
164326	2022-06-30	128225	137	137000	True
174902	2022-12-31	145	132717600	132717600	True
180237	2023-03-31	143	133881144	133881144	True
187673	2023-06-30	143	141582997	141582997	False
192217	2023-09-30	143	131914090	131914090	False

	Adj Close
Date
2013-09-30	139.467758
2013-12-31	154.146393
2014-03-31	156.771637
2014-06-30	164.859512
2014-09-30	166.730240

Analyze Form 13F Filings with Python - Fund Performance Monitoring & More

Trend Analysis of Active Funds

Fund Lifespan and Survival Rates

Evolution of Asset Values

Ranking Funds by Performance Factors

Outlier Detection

Performance Metrics Calculation & Ranking

Visualizing Fund Performance Relative to the Benchmark

Alpha Distribution Across Funds

Conclusion

	cik	formType	accessionNo	filedAt	year
0	1025421	13F-HR	0000732697-98-000004	1998-02-06 05:00:00+00:00	1998
1	937522	13F-HR	0000937522-98-000014	1998-02-11 05:00:00+00:00	1998
2	1044797	13F-HR	0000950144-98-001301	1998-02-12 05:00:00+00:00	1998
3	1001540	13F-HR	0000201196-98-000001	1998-02-13 05:00:00+00:00	1998
4	1026214	13F-HR	0000914121-98-000170	1998-02-17 05:00:00+00:00	1998
...	...	...	...	...	...
390142	2004720	13F-HR	0001398344-23-023298	2023-12-21 23:24:48+00:00	2023
390143	2004720	13F-HR	0001398344-23-023299	2023-12-21 23:26:06+00:00	2023
390144	2004720	13F-HR	0001398344-23-023302	2023-12-21 23:28:56+00:00	2023
390145	2004720	13F-HR	0001398344-23-023303	2023-12-21 23:29:23+00:00	2023
390146	1731134	13F-HR/A	0001214659-23-016755	2023-12-22 20:44:54+00:00	2023

	year	count
16	2014	3876
17	2015	4221
18	2016	4393
19	2017	4533
20	2018	4901
21	2019	5224
22	2020	5596
23	2021	6191
24	2022	7032
25	2023	7254

	cik	assetValue	#holdings	firstPeriod	#periods	maxDrawdownQtrPct	volYr	sharpe	sortino	beta	alpha	t-stat	p	assets.pct_change_1yr	assets.pct_change_3yr	assets.pct_change_5yr
0	1737109	5668882543	1557	2018-03-31	22	-8.66	0.31	2.181	12.280	1.163	0.530	3.403	0.002	81.11	249.45	1612.68
1	1698218	2227118451	702	2017-06-30	26	-8.98	0.18	2.048	5.372	0.580	0.307	2.497	0.016	18.26	150.13	463.99
2	838618	772954565	338	2018-03-31	23	-2.77	0.20	1.956	24.062	0.860	0.298	2.333	0.025	40.71	214.78	561.58
3	354497	6123311000	512	2018-03-31	22	-10.91	0.22	1.877	6.070	0.563	0.330	2.159	0.037	22.87	125.88	418.69
4	1731061	1016916977	1706	2018-03-31	23	-7.98	0.27	1.839	6.757	0.623	0.422	2.663	0.011	44.65	317.55	1062.82
5	1540235	74941949688	4350	2013-09-30	41	-15.36	0.15	1.836	2.657	0.858	0.180	2.435	0.017	29.61	95.65	178.02
6	1744073	2070307627	297	2017-03-31	25	-13.82	0.21	1.829	5.238	0.911	0.272	2.200	0.033	9.05	129.71	326.88
7	1482880	6702866033	798	2013-09-30	41	-14.24	0.22	1.773	4.059	0.566	0.317	3.247	0.002	108.81	358.45	1103.50
8	1141802	76903880909	3266	2013-09-30	41	-17.98	0.23	1.722	3.406	0.680	0.319	3.269	0.002	34.15	106.95	288.03
9	1721608	1646575000	82	2017-12-31	24	-11.27	0.25	1.704	4.870	0.572	0.365	2.425	0.019	38.92	97.45	425.19

	cik	assetValue	#holdings	firstPeriod	#periods	maxDrawdownQtrPct	volYr	sharpe	sortino	beta	alpha	t-stat	p	assets.pct_change_1yr	assets.pct_change_3yr	assets.pct_change_5yr
6075	1761013	25903856830	1821	2019-03-31	19	-67.95	7.16	0.491	7.630	6.704	2.685	0.979	0.335	227.48	383.91	NaN
4288	1633446	439415641	185	2015-03-31	35	-26.17	6.54	0.381	12.867	-0.427	2.497	1.051	0.297	5.97	36.95	115.05
5616	1728319	1188655240	537	2018-03-31	22	-20.23	5.54	0.482	20.686	1.065	2.489	1.031	0.309	8.08	92.43	123.49
6470	1780985	384133000	273	2019-09-30	16	-54.05	3.73	0.680	7.846	0.876	2.344	1.208	0.237	52.44	100.44	NaN
1945	1304762	1688600000	82	2013-09-30	14	-4.76	5.01	0.601	75.493	7.004	2.241	1.007	0.324	1030.22	1195.71	NaN
6311	1767640	36476308819	52	2019-03-31	16	-48.19	2.09	0.847	6.114	-3.403	2.171	1.451	0.158	-0.92	417.45	NaN
6521	1766904	1860030585	3045	2019-12-31	16	-53.21	3.11	0.754	4.668	1.159	2.154	1.347	0.189	53.16	293.46	NaN
5725	1722053	484195379	311	2018-03-31	22	-58.37	4.08	0.523	6.372	0.792	1.992	1.105	0.276	13.89	-73.27	220.43
2175	1364742	3627977394535	49732	2013-09-30	39	-21.61	8.33	0.341	21.199	7.700	1.938	0.995	0.323	23.39	33.88	56.98
2668	1420473	38052867090	277	2013-09-30	41	-15.57	4.82	0.401	19.717	0.114	1.897	1.184	0.240	24.10	51.59	625.65

	alpha
count	4022.00
mean	0.04
std	0.26
min	-0.85
25%	-0.08
50%	0.00
75%	0.11
max	2.68