%matplotlib inline

from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import yfinance as yf
from scipy.optimize import minimize

# Import core functions from project modules (ensure the project is in PYTHONPATH or use relative imports)
from app.config.config import (
    HISTORICAL_PERIOD_DAYS, BENCHMARK_TICKER,
    PORTFOLIO_EVOLUTION_YEARS, TARGET_MARKET_BETA,
    NUMBER_OF_PORTFOLIOS)
from app.visualization.visuals import (
    create_pie_chart, plot_historical_prices, plot_daily_returns,
    plot_portfolio_returns, plot_portfolio_evolution, plot_sharpe_ratio_scatter,
    plot_efficient_frontier
)
from app.analysis.portfolio_analyzer import PortfolioAnalyzer

print("HISTORICAL_PERIOD_DAYS",HISTORICAL_PERIOD_DAYS)
print("BENCHMARK_TICKER",BENCHMARK_TICKER)
print("PORTFOLIO_EVOLUTION_YEARS",PORTFOLIO_EVOLUTION_YEARS)
print("TARGET_MARKET_BETA",TARGET_MARKET_BETA)
print("NUMBER_OF_PORTFOLIOS",NUMBER_OF_PORTFOLIOS)

HISTORICAL_PERIOD_DAYS 365
BENCHMARK_TICKER ^GSPC
PORTFOLIO_EVOLUTION_YEARS 3
TARGET_MARKET_BETA 1
NUMBER_OF_PORTFOLIOS 10000

# Simulated user portfolio (example)
amount = 10000  # Total investment amount

# User's tickers and their allocations (as percentages)
# This represents a typical tech-heavy portfolio
ticker_percentage = {
    'AAPL': 0.4,   # 40% Apple - Large cap tech
    'MSFT': 0.3,   # 30% Microsoft - Cloud/software leader  
    'GOOGL': 0.3   # 30% Google - Digital advertising leader
}

tickers = list(ticker_percentage.keys())
weights = list(ticker_percentage.values())
num_tickers = len(tickers)

print(f" Analyzing portfolio with {num_tickers} assets:")
for ticker, weight in ticker_percentage.items():
    print(f"   {ticker}: {weight:.1%}")

 Analyzing portfolio with 3 assets:
   AAPL: 40.0%
   MSFT: 30.0%
   GOOGL: 30.0%

end_date = datetime.today().date()
start_date = end_date - timedelta(days=HISTORICAL_PERIOD_DAYS)
print(f" Analysis Period: {start_date} to {end_date}")
print(f" Total Days: {HISTORICAL_PERIOD_DAYS} days (~{HISTORICAL_PERIOD_DAYS/365:.1f} years)")

 Analysis Period: 2024-07-15 to 2025-07-15
 Total Days: 365 days (~1.0 years)

def get_historical_prices(tickers, start_date, end_date):
    """
    Fetch historical adjusted closing prices for given tickers.
    
    Why Adjusted Close?
    - Accounts for stock splits and dividends
    - Provides accurate historical returns
    - Essential for proper portfolio analysis
    """
    data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=False)

    if data.empty:
        print("❌ Yahoo Finance returned empty data. Please check ticker symbols and try again.")
        return None

    # Handle MultiIndex data (multiple tickers)
    if isinstance(data.columns, pd.MultiIndex):
        try:
            adj_close = data.xs('Adj Close', level=1, axis=1)
            return adj_close
        except KeyError:
            print("❌ No 'Adj Close' data found in MultiIndex columns.")
            return None

    # Handle flat DataFrame (single ticker)
    if 'Adj Close' in data.columns:
        return pd.DataFrame(data['Adj Close'])

    print("❌ Unexpected data format from Yahoo Finance.")
    return None

prices = get_historical_prices(tickers, start_date, end_date)
print(f" Successfully loaded {len(prices)} days of price data")
print(f" Data shape: {prices.shape}")
prices.head()

[*********************100%***********************]  3 of 3 completed

 Successfully loaded 250 days of price data
 Data shape: (250, 3)

plot_historical_prices(prices)

def get_daily_returns(price):
    """
    Calculate daily percentage returns from price data.
    
    Formula: (Price_today / Price_yesterday) - 1
    
    Why daily returns?
    - Standardizes different assets for comparison
    - Essential for risk calculations (volatility)
    - Required for correlation analysis
    - Foundation for all portfolio optimization
    """
    returns = price.pct_change()
    return returns

daily_returns = get_daily_returns(prices)
daily_returns.head()

plot_daily_returns(daily_returns)

def get_portfolio_returns(weights, daily_returns):
    """
    Calculate portfolio daily returns using weighted average.
    
    This is the core of Modern Portfolio Theory:
    Portfolio Return = Sum of (Weight × Individual Return)
    
    The magic of diversification happens here:
    - Individual stocks may be volatile
    - Portfolio can be more stable than components
    - Correlation matters more than individual risk
    """
    portfolio_returns = daily_returns.dot(weights)
    return portfolio_returns

port_daily_return = get_portfolio_returns(weights, daily_returns)
port_daily_return.head()

Date
2024-07-09         NaN
2024-07-10    0.014678
2024-07-11   -0.026135
2024-07-12    0.002079
2024-07-15    0.008450
dtype: float64

plot_portfolio_returns(port_daily_return)

def get_benchmark_data(start_date, end_date):
    """
    Get benchmark (S&P 500) data for beta calculations.
    
    Why S&P 500?
    - Represents broad US market
    - Standard benchmark for beta calculation
    - Most liquid and tracked index
    - Contains our portfolio stocks (reduces noise)
    """
    benchmark_prices = get_historical_prices(BENCHMARK_TICKER, start_date, end_date)
    
    if benchmark_prices is None:
        print("Benchmark prices could not be retrieved.")
        return None

    benchmark_daily_returns = get_daily_returns(benchmark_prices)

    if benchmark_daily_returns is None or benchmark_daily_returns.empty:
        print("Benchmark daily returns could not be computed.")
        return None

    return benchmark_daily_returns.iloc[:, 0]

def calculate_beta(daily_returns, benchmark_returns):
    """
    Calculate Beta coefficient for each stock vs benchmark.
    
    Beta Interpretation:
    - β = 1.0: Perfect correlation with market
    - β = 1.5: 50% more volatile than market
    - β = 0.5: 50% less volatile than market
    
    High Beta = High Risk = High Potential Return
    Low Beta = Low Risk = Lower Potential Return
    """
    beta_list = {}
    
    # Calculate beta for each stock
    for ticker in daily_returns:
        # Covariance: How stock and market move together
        covariance = daily_returns[ticker].cov(benchmark_returns)
        
        # Variance: How much market moves on its own
        variance = benchmark_returns.var()
        
        # Beta: Systematic risk relative to market
        beta = covariance / variance
        
        beta_list[ticker] = beta
    
    beta_series = pd.Series(beta_list, name='Beta')
    
    return beta_series

benchmark_daily_returns = get_benchmark_data(start_date, end_date)
betas = calculate_beta(daily_returns, benchmark_daily_returns)
betas

[*********************100%***********************]  1 of 1 completed

GOOGL    1.052903
AAPL     1.236280
MSFT     0.969317
Name: Beta, dtype: float64

def calculate_beta_weights(data):
    """
    Calculate portfolio weights inversely proportional to beta.
    
    Goal: Create portfolio with target market beta
    Method: Give lower weights to high-beta (risky) stocks
    
    This is a risk management technique:
    - Reduces portfolio volatility
    - Provides more stable returns
    - Maintains market exposure at desired level
    """
    if isinstance(data, pd.Series):
        # If input is a Series, create a DataFrame with one column
        df = pd.DataFrame(data, columns=['Beta'])
    else:
        # If input is already a DataFrame, use it directly
        df = data

    beta_weights = {}
    target_market_beta = TARGET_MARKET_BETA
    sum_of_all_stock_betas = df['Beta'].sum()
    
    for index, row in df.iterrows():
        numerator = target_market_beta - row['Beta']
        denominator = sum_of_all_stock_betas - row['Beta']
        stock_weight = numerator / denominator
        beta_weights[index] = stock_weight
        
    beta_weights_df = pd.DataFrame.from_dict(beta_weights, orient='index', columns=['Weight'])
    beta_weights_df_normalized = beta_weights_df['Weight'] / beta_weights_df['Weight'].sum()
    
    return beta_weights_df_normalized

beta_weight = calculate_beta_weights(betas)
beta_weight

GOOGL    0.188235
AAPL     0.916952
MSFT    -0.105187
Name: Weight, dtype: float64

def calculate_risk_parity_weights(returns):
    """Risk Parity: invest such a way that every asset we have in the portfolio has the same risk contribution"""
    # Step 1: Calculate asset volatilities (standard deviation of returns)
    # Higher volatility = higher risk
    asset_volatility = returns.std(axis=0)
    # print(asset_volatility)

    # Step 2: Calculate each asset's risk contribution
    # This shows how much each asset contributes to total portfolio risk
    asset_risk_contribution = asset_volatility / asset_volatility.sum()
    # print(asset_risk_contribution)

    # Step 3: Determine target risk allocation
    # We want each asset to contribute equally to risk
    target_risk_allocation = 1 / len(asset_volatility)
    # print(target_risk_allocation)

    # Step 4: Calculate weights inversely proportional to risk
    # Assets with higher risk get lower weights
    weights = target_risk_allocation / asset_risk_contribution

    # Step 5: Normalize weights to sum to 1 (100%)
    weights /= weights.sum()

    return weights

risk_parity_weights = calculate_risk_parity_weights(daily_returns)
risk_parity_weights

Ticker
GOOGL    0.307163
AAPL     0.306117
MSFT     0.386721
dtype: float64

create_pie_chart(risk_parity_weights, tickers, 'Risk Parity Portfolio')

analyzer = PortfolioAnalyzer(tickers)

user_value, user_return = analyzer.analyze_strategy('User', weights, PORTFOLIO_EVOLUTION_YEARS)
plot_portfolio_evolution(user_value, "User Allocation Evolution")
print(f"Total Return: {user_return:.2%}")

[*********************100%***********************]  3 of 3 completed
/Users/chris/Documents/GitHub/columbia_fintech_bootcamp/projects/RoboPort-main/app/analysis/portfolio_analyzer.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  total_return = (portfolio_value['Profit Close'][-1]/portfolio_value['Profit Close'][0])-1

Total Return: 68.90%

rp_value, rp_return = analyzer.analyze_strategy('Risk Parity', risk_parity_weights, PORTFOLIO_EVOLUTION_YEARS)
plot_portfolio_evolution(rp_value, "Risk Parity Evolution")
print(f"Total Return: {rp_return:.2%}")

[*********************100%***********************]  3 of 3 completed
/Users/chris/Documents/GitHub/columbia_fintech_bootcamp/projects/RoboPort-main/app/analysis/portfolio_analyzer.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  total_return = (portfolio_value['Profit Close'][-1]/portfolio_value['Profit Close'][0])-1

Total Return: 73.39%

beta_value, beta_return = analyzer.analyze_strategy('Beta', beta_weight, PORTFOLIO_EVOLUTION_YEARS)
plot_portfolio_evolution(beta_value, "Beta Weights Evolution")
print(f"Total Return: {beta_return:.2%}")

[*********************100%***********************]  3 of 3 completed
/Users/chris/Documents/GitHub/columbia_fintech_bootcamp/projects/RoboPort-main/app/analysis/portfolio_analyzer.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  total_return = (portfolio_value['Profit Close'][-1]/portfolio_value['Profit Close'][0])-1

Total Return: 39.98%

def calculate_sharpe_ratio_optimization(prices, num_tickers):
    """Calculate optimal portfolio weights using Sharpe ratio optimization via Monte Carlo simulation"""
    
    # Step 1: Calculate log returns (more stable for optimization)
    returns_marco = prices/(prices.shift(1))
    returns_marco.dropna(inplace=True)
    logreturns = np.log(returns_marco)
    
    # Step 2: Calculate expected returns and covariance matrix
    meanlog = logreturns.mean()  # Expected daily log returns
    sigma = logreturns.cov()     # Covariance matrix (risk relationships)
    
    
    # Step 3: Set up Monte Carlo simulation
    no_porfolio = NUMBER_OF_PORTFOLIOS
    
    # Arrays to store results
    test_weight = np.zeros((no_porfolio, num_tickers))
    test_return = np.zeros(no_porfolio)
    test_volatility = np.zeros(no_porfolio)
    sharpratio = np.zeros(no_porfolio)
    
    # Step 4: Monte Carlo simulation loop
    for k in range(no_porfolio):
        # Generate random weights that sum to 1
        random_weight = np.array(np.random.random(num_tickers))
        random_weight = random_weight/sum(random_weight)
        test_weight[k,:] = random_weight
        
        # Calculate portfolio expected return
        test_return[k] = np.sum(meanlog &#42; random_weight) 
        
        # Calculate portfolio volatility using matrix multiplication
        # This captures both individual asset risk and correlations
        test_volatility[k] = np.sqrt(np.dot(random_weight.T, np.dot(sigma, random_weight)))
        
        # Calculate Sharpe ratio
        sharpratio[k] = test_return[k]/test_volatility[k]
    
    # Step 5: Find the portfolio with maximum Sharpe ratio
    max_sharpratio = sharpratio.argmax()
    sharpratio_weight = test_weight[max_sharpratio,:]
    
    return {
        'test_volatility': test_volatility,
        'test_return': test_return,
        'sharpratio': sharpratio,
        'max_sharpratio': max_sharpratio,
        'sharpratio_weight': sharpratio_weight,
        'meanlog': meanlog,
        'sigma': sigma
    }

sharpe_data = calculate_sharpe_ratio_optimization(prices, num_tickers)
plot_sharpe_ratio_scatter(
    sharpe_data['test_volatility'], 
    sharpe_data['test_return'], 
    sharpe_data['sharpratio'], 
    sharpe_data['max_sharpratio']
)

sr_value, sr_return = analyzer.analyze_strategy('Sharp Ratio', sharpe_data['sharpratio_weight'], PORTFOLIO_EVOLUTION_YEARS)
plot_portfolio_evolution(sr_value, "Sharpe Ratio Evolution")
print(f"Total Return: {sr_return:.2%}")

[*********************100%***********************]  3 of 3 completed
/Users/chris/Documents/GitHub/columbia_fintech_bootcamp/projects/RoboPort-main/app/analysis/portfolio_analyzer.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  total_return = (portfolio_value['Profit Close'][-1]/portfolio_value['Profit Close'][0])-1

Total Return: 48.21%

def calculate_markowitz_optimization(meanlog, sigma, num_tickers, test_return):
    """Calculate Markowitz optimal portfolio"""
    def negative_sharpratio(random_weight):
        random_weights = np.array(random_weight)
        R = np.sum(meanlog&#42;random_weights)
        V = np.sqrt(np.dot(random_weight.T, np.dot(sigma, random_weights)))
        SR = R/V
        return -SR
    
    def checksumtoone(random_weight):
        return np.sum(random_weight)-1
    
    w_0 = [1/num_tickers for _ in range(num_tickers)]
    bounds = [(0,1) for _ in range(num_tickers)]
    constraints = ({'type':'eq','fun':checksumtoone})
    optimal_weight = minimize(negative_sharpratio, w_0, method='SLSQP', bounds=bounds, constraints=constraints)
    
    # Calculate efficient frontier
    returns = np.linspace(0, max(test_return), 50)
    optimal_volatility = []  
    
    def minmizevolatility(random_weight):
        random_weights = np.array(random_weight)
        V = np.sqrt(np.dot(random_weight.T, np.dot(sigma, random_weights)))
        return V
    
    def getreturn(w):
        w = np.array(w)
        R = np.sum(meanlog&#42;w)
        return R

    for r in returns:
        #find best volatility
        constraints = ({'type':'eq','fun':checksumtoone},{'type':'eq','fun': lambda random_weight: getreturn(random_weight)- r})
        optimal = minimize(minmizevolatility, w_0, method='SLSQP', bounds=bounds, constraints=constraints)
        optimal_volatility.append(optimal['fun'])
    
    return {
        'optimal_weight': optimal_weight,
        'returns': returns,
        'optimal_volatility': optimal_volatility
    }

markowitz_data = calculate_markowitz_optimization(
    sharpe_data['meanlog'], 
    sharpe_data['sigma'], 
    num_tickers, 
    sharpe_data['test_return']
)
plot_efficient_frontier(
    sharpe_data['test_volatility'],
    sharpe_data['test_return'],
    sharpe_data['sharpratio'],
    sharpe_data['max_sharpratio'],
    markowitz_data['optimal_volatility'],
    markowitz_data['returns']
)

markowitz_value, markowitz_return = analyzer.analyze_strategy('Markowitz', markowitz_data['optimal_weight'].x, PORTFOLIO_EVOLUTION_YEARS)
plot_portfolio_evolution(markowitz_value, "Markowitz Evolution")
print(f"Total Return: {markowitz_return:.2%}")

[*********************100%***********************]  3 of 3 completed
/Users/chris/Documents/GitHub/columbia_fintech_bootcamp/projects/RoboPort-main/app/analysis/portfolio_analyzer.py:17: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  total_return = (portfolio_value['Profit Close'][-1]/portfolio_value['Profit Close'][0])-1

Total Return: 92.51%

best_strategy, best_return = analyzer.get_best_strategy()
print(f"Best Strategy: {best_strategy} with return {best_return:.2%}")

Best Strategy: Markowitz with return 92.51%

# Display corresponding pie chart and weights
if best_strategy == 'Risk Parity':
    create_pie_chart(risk_parity_weights, tickers)
    df = analyzer.create_recommendation_dataframe(best_strategy, risk_parity_weights)
elif best_strategy == 'Markowitz':
    create_pie_chart(markowitz_data['optimal_weight'].x, tickers)
    df = analyzer.create_recommendation_dataframe(best_strategy, markowitz_data['optimal_weight'].x)
elif best_strategy == 'User':
    create_pie_chart(weights, tickers)
    df = analyzer.create_recommendation_dataframe(best_strategy, weights)
elif best_strategy == 'Beta':
    create_pie_chart(beta_weight, tickers)
    df = analyzer.create_recommendation_dataframe(best_strategy, beta_weight)
elif best_strategy == 'Sharp Ratio':
    create_pie_chart(sharpe_data['sharpratio_weight'], tickers)
    df = analyzer.create_recommendation_dataframe(best_strategy, sharpe_data['sharpratio_weight'])

df

Ticker	GOOGL	MSFT	AAPL
Date
2024-07-15	185.630783	450.505981	233.308884
2024-07-16	183.033371	446.099762	233.726944
2024-07-17	180.147354	440.145416	227.814575
2024-07-18	176.833420	437.019409	223.136459
2024-07-19	176.803558	433.784180	223.265839

	Key	Value
0	AAPL	2.792905e-16
1	MSFT	3.108562e-16
2	GOOGL	1.000000e+00

Portfolio Analysis with Modern Portfolio Theory: 5 Strategies

Project Overview: Portfolio Optimization with MPT

Project Overview

Step 1: Define Portfolio Inputs

What We're Doing Here

Key Concepts

Step 2: Set the Analysis Date Range

Historical Analysis Period

Important Note

Step 3: Load Historical Price Data

Data Collection Process

Technical Implementation

Step 4: Visualize Historical Prices

Price Trend Visualization

What to Look For

Step 5: Calculate Daily Returns

From Prices to Returns

The Math

Step 6: Portfolio Daily Returns

Creating a Composite Portfolio

The Mathematics

Why This Matters

Step 8: Beta-Weighted Strategy

Strategy Philosophy

The Algorithm

Strategic Logic

Advantages

Limitations

Step 9: Risk Parity Weights

What is Risk Parity?

Key Concepts:

Mathematical Foundation:

Benefits:

Example:

Step 10: Portfolio Strategy Analysis Framework

What is the PortfolioAnalyzer?

Key Functions:

How It Works:

Why This Matters:

Analyze User Strategy

Analyze Risk Parity Strategy

Analyze Beta Weight Strategy

Step 11: Sharpe Ratio Optimization

What is the Sharpe Ratio?

Why Optimize for Sharpe Ratio?

Monte Carlo Simulation Approach:

Mathematical Process:

Expected Outcome:

Understanding the Sharpe Ratio Scatter Plot

Understanding the Efficient Frontier Plot

Analyzing Markowitz Portfolio Performance

Step 13: Final Strategy Recommendation

Comprehensive Strategy Evaluation Framework

Strategies Being Compared

Evaluation Methodology

Why Total Return as the Primary Metric?

Strategic Allocation Insights

Interpreting the Recommendation

Understanding the Final Allocation

Conclusion

Previous PostSmart Options: Navigating the High-Stakes World of Retail Options Trading | Part 1

Next PostHow I Turned Years of Scattered SQL Files Into a Complete SQL Mastery System Using ChatGPT