In [ ]:
Copied!
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import traceback # For detailed error printing
from PMF_toolkits.core import PMF
from PMF_toolkits.readers import XlsxReader, MultisitesReader # Import readers
from PMF_toolkits.analysis import compute_similarity_metrics
# Suppress warnings (optional)
import warnings
warnings.filterwarnings('ignore')
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import traceback # For detailed error printing
from PMF_toolkits.core import PMF
from PMF_toolkits.readers import XlsxReader, MultisitesReader # Import readers
from PMF_toolkits.analysis import compute_similarity_metrics
# Suppress warnings (optional)
import warnings
warnings.filterwarnings('ignore')
1.1 Configure data directories¶
Let's set up paths for both single-site and multi-site data.
In [ ]:
Copied!
# Define data directories (adjust paths if necessary)
single_site_data_dir = "single_site" # Path to single site PMF output files
multi_site_data_dir = "multi_site" # Path to multi-site PMF output files
# Define site names
single_site_name = "GRE-fr" # Single site name
multi_site_name = "11fnew" # Multi-site name (prefix for multi-site files)
# Define data directories (adjust paths if necessary)
single_site_data_dir = "single_site" # Path to single site PMF output files
multi_site_data_dir = "multi_site" # Path to multi-site PMF output files
# Define site names
single_site_name = "GRE-fr" # Single site name
multi_site_name = "11fnew" # Multi-site name (prefix for multi-site files)
1.2 Quick check: Load Multi-site Data¶
Attempt to load the multi-site data first to verify the reader corrections.
In [ ]:
Copied!
# Attempt to load multi-site data
print(f"--- Loading Multi-site Data ({multi_site_name}) ---")
pmf_multi = None # Initialize
multi_loaded = False
try:
# Use multisites=True flag
pmf_multi = PMF(site=multi_site_name, reader="xlsx", BDIR=multi_site_data_dir, multisites=True)
# Load all data components using read_all()
print("\nLoading all data components using read_all()...")
pmf_multi.read.read_all()
print("\nFinished attempting read_all().")
# Verify data loading status
multi_loaded = pmf_multi.ensure_data_loaded()
print(f"\nData loaded check result: {multi_loaded}")
except Exception as main_e:
print(f"\n Major error during multi-site loading: {str(main_e)}")
traceback.print_exc()
# Display status of key dataframes
print("\n--- Multi-site Data Status ---")
if pmf_multi is not None:
for attr in ['dfprofiles_b', 'dfprofiles_c', 'dfcontrib_b', 'dfcontrib_c',
'dfBS_profile_b', 'dfBS_profile_c', 'dfbootstrap_mapping_b', 'dfbootstrap_mapping_c',
'df_uncertainties_summary_b', 'df_uncertainties_summary_c',
'df_disp_swap_b', 'df_disp_swap_c']:
is_present = hasattr(pmf_multi, attr) and getattr(pmf_multi, attr) is not None
is_empty = getattr(pmf_multi, attr).empty if is_present else True
shape = getattr(pmf_multi, attr).shape if is_present and not is_empty else None
status = f"Present with shape {shape}" if is_present and not is_empty else ("Present but Empty" if is_present else "Missing")
print(f"- {attr}: {status}")
else:
print("PMF object 'pmf_multi' could not be initialized or loading failed severely.")
# Attempt to load multi-site data
print(f"--- Loading Multi-site Data ({multi_site_name}) ---")
pmf_multi = None # Initialize
multi_loaded = False
try:
# Use multisites=True flag
pmf_multi = PMF(site=multi_site_name, reader="xlsx", BDIR=multi_site_data_dir, multisites=True)
# Load all data components using read_all()
print("\nLoading all data components using read_all()...")
pmf_multi.read.read_all()
print("\nFinished attempting read_all().")
# Verify data loading status
multi_loaded = pmf_multi.ensure_data_loaded()
print(f"\nData loaded check result: {multi_loaded}")
except Exception as main_e:
print(f"\n Major error during multi-site loading: {str(main_e)}")
traceback.print_exc()
# Display status of key dataframes
print("\n--- Multi-site Data Status ---")
if pmf_multi is not None:
for attr in ['dfprofiles_b', 'dfprofiles_c', 'dfcontrib_b', 'dfcontrib_c',
'dfBS_profile_b', 'dfBS_profile_c', 'dfbootstrap_mapping_b', 'dfbootstrap_mapping_c',
'df_uncertainties_summary_b', 'df_uncertainties_summary_c',
'df_disp_swap_b', 'df_disp_swap_c']:
is_present = hasattr(pmf_multi, attr) and getattr(pmf_multi, attr) is not None
is_empty = getattr(pmf_multi, attr).empty if is_present else True
shape = getattr(pmf_multi, attr).shape if is_present and not is_empty else None
status = f"Present with shape {shape}" if is_present and not is_empty else ("Present but Empty" if is_present else "Missing")
print(f"- {attr}: {status}")
else:
print("PMF object 'pmf_multi' could not be initialized or loading failed severely.")
2. Single Site Analysis¶
2.1 Load single site data¶
In [ ]:
Copied!
# Create PMF object for single site
print(f"\n--- Loading Single-site Data ({single_site_name}) ---")
pmf_single = None
single_loaded = False
try:
# Explicitly set multisites=False
pmf_single = PMF(site=single_site_name, reader="xlsx", BDIR=single_site_data_dir, multisites=False)
# Load all data
print("Loading all data components using read_all()...")
pmf_single.read.read_all()
print("\nFinished attempting read_all().")
# Verify data was loaded successfully
single_loaded = pmf_single.ensure_data_loaded()
print(f"\nData loaded check result: {single_loaded}")
except Exception as e:
print(f"\n Error loading single-site data: {e}")
traceback.print_exc()
# Display constrained contributions if loaded
if single_loaded:
print("\nSingle-site Constrained Contributions (Head):")
display(pmf_single.dfcontrib_c.head())
else:
print("\nSingle-site data not fully loaded.")
# Create PMF object for single site
print(f"\n--- Loading Single-site Data ({single_site_name}) ---")
pmf_single = None
single_loaded = False
try:
# Explicitly set multisites=False
pmf_single = PMF(site=single_site_name, reader="xlsx", BDIR=single_site_data_dir, multisites=False)
# Load all data
print("Loading all data components using read_all()...")
pmf_single.read.read_all()
print("\nFinished attempting read_all().")
# Verify data was loaded successfully
single_loaded = pmf_single.ensure_data_loaded()
print(f"\nData loaded check result: {single_loaded}")
except Exception as e:
print(f"\n Error loading single-site data: {e}")
traceback.print_exc()
# Display constrained contributions if loaded
if single_loaded:
print("\nSingle-site Constrained Contributions (Head):")
display(pmf_single.dfcontrib_c.head())
else:
print("\nSingle-site data not fully loaded.")
In [ ]:
Copied!
pmf_single.name = 'single'
pmf_single.name = 'single'
In [ ]:
Copied!
pmf_single.plot.plot_seasonal_contributions(stacked=True)
pmf_single.plot.plot_seasonal_contributions(stacked=True)
In [ ]:
Copied!
pmf_single.plot.plot_total_species_sum(station_list = [pmf_single],source = 'Primary traffic')
pmf_single.plot.plot_total_species_sum(station_list = [pmf_single],source = 'Primary traffic')
In [ ]:
Copied!
# Display single-site constrained profiles if loaded
if single_loaded:
print("\nSingle-site Constrained Profiles (Head):")
display(pmf_single.dfprofiles_c.head())
# Display single-site constrained profiles if loaded
if single_loaded:
print("\nSingle-site Constrained Profiles (Head):")
display(pmf_single.dfprofiles_c.head())
2.2 Explore available profiles and species (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# List available profiles (factors)
if pmf_single.profiles:
print(f"Available Profiles (Factors): {pmf_single.profiles}")
print(f"Number of factors: {pmf_single.nprofiles}")
else:
print("Profiles not loaded.")
if pmf_single.totalVar:
print(f"\nTotal Variable: {pmf_single.totalVar}")
else:
print("\nTotal Variable not set.")
# Display first few species
if pmf_single.species:
print(f"\nFirst 10 species: {pmf_single.species[:10]}")
print(f"Total number of species: {pmf_single.nspecies}")
else:
print("\nSpecies list not available.")
else:
print("Single-site data not loaded.")
if single_loaded:
# List available profiles (factors)
if pmf_single.profiles:
print(f"Available Profiles (Factors): {pmf_single.profiles}")
print(f"Number of factors: {pmf_single.nprofiles}")
else:
print("Profiles not loaded.")
if pmf_single.totalVar:
print(f"\nTotal Variable: {pmf_single.totalVar}")
else:
print("\nTotal Variable not set.")
# Display first few species
if pmf_single.species:
print(f"\nFirst 10 species: {pmf_single.species[:10]}")
print(f"Total number of species: {pmf_single.nspecies}")
else:
print("\nSpecies list not available.")
else:
print("Single-site data not loaded.")
2.3 Basic data inspection (Single Site)¶
In [ ]:
Copied!
if single_loaded and hasattr(pmf_single, 'dfprofiles_b') and pmf_single.dfprofiles_b is not None:
print("Single-site Base Profiles (Head):")
display(pmf_single.dfprofiles_b.head())
if single_loaded and hasattr(pmf_single, 'dfprofiles_b') and pmf_single.dfprofiles_b is not None:
print("Single-site Base Profiles (Head):")
display(pmf_single.dfprofiles_b.head())
In [ ]:
Copied!
if single_loaded and hasattr(pmf_single, 'dfprofiles_c') and pmf_single.dfprofiles_c is not None:
print("Single-site Constrained Profiles (Head):")
display(pmf_single.dfprofiles_c.head())
if single_loaded and hasattr(pmf_single, 'dfprofiles_c') and pmf_single.dfprofiles_c is not None:
print("Single-site Constrained Profiles (Head):")
display(pmf_single.dfprofiles_c.head())
2.4 Convert contributions to µg/m³ (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Convert contributions to µg/m³ for the total variable
try:
contributions = pmf_single.to_cubic_meter()
print("Total Contributions (µg/m³):")
display(contributions.head())
except Exception as e:
print(f"Error calculating total contributions: {e}")
# Get contributions for a specific species
try:
if pmf_single.species and "SO42-" in pmf_single.species:
so4_contrib = pmf_single.to_cubic_meter(specie="SO42-")
print("\nSO4 contributions (µg/m³):")
display(so4_contrib.head())
else:
print("\nSO4 not found in species list or species list not available.")
except Exception as e:
print(f"Error calculating SO4 contributions: {e}")
else:
print("Single-site data not loaded, skipping contribution conversion.")
if single_loaded:
# Convert contributions to µg/m³ for the total variable
try:
contributions = pmf_single.to_cubic_meter()
print("Total Contributions (µg/m³):")
display(contributions.head())
except Exception as e:
print(f"Error calculating total contributions: {e}")
# Get contributions for a specific species
try:
if pmf_single.species and "SO42-" in pmf_single.species:
so4_contrib = pmf_single.to_cubic_meter(specie="SO42-")
print("\nSO4 contributions (µg/m³):")
display(so4_contrib.head())
else:
print("\nSO4 not found in species list or species list not available.")
except Exception as e:
print(f"Error calculating SO4 contributions: {e}")
else:
print("Single-site data not loaded, skipping contribution conversion.")
In [ ]:
Copied!
if single_loaded:
print("Single-site Species List:")
print(pmf_single.species)
if single_loaded:
print("Single-site Species List:")
print(pmf_single.species)
2.5 Calculate seasonal contributions (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Get seasonal contributions
try:
seasonal_contrib = pmf_single.get_seasonal_contribution(annual=True, normalize=True)
print("Normalized Seasonal Contributions (including Annual):")
display(seasonal_contrib)
except Exception as e:
print(f"Error calculating seasonal contributions: {e}")
else:
print("Single-site data not loaded, skipping seasonal calculation.")
if single_loaded:
# Get seasonal contributions
try:
seasonal_contrib = pmf_single.get_seasonal_contribution(annual=True, normalize=True)
print("Normalized Seasonal Contributions (including Annual):")
display(seasonal_contrib)
except Exception as e:
print(f"Error calculating seasonal contributions: {e}")
else:
print("Single-site data not loaded, skipping seasonal calculation.")
2.6 Calculate explained variation & temporal correlation (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Calculate explained variation
try:
explained_var = pmf_single.analysis.explained_variation()
print("Explained Variation (Head):")
display(explained_var.head())
except Exception as e:
print(f"Error calculating explained variation: {e}")
# Calculate factor temporal correlation
try:
temporal_corr = pmf_single.analysis.factor_temporal_correlation()
print("\nFactor Temporal Correlation:")
display(temporal_corr)
except Exception as e:
print(f"Error calculating temporal correlation: {e}")
else:
print("Single-site data not loaded, skipping analysis.")
if single_loaded:
# Calculate explained variation
try:
explained_var = pmf_single.analysis.explained_variation()
print("Explained Variation (Head):")
display(explained_var.head())
except Exception as e:
print(f"Error calculating explained variation: {e}")
# Calculate factor temporal correlation
try:
temporal_corr = pmf_single.analysis.factor_temporal_correlation()
print("\nFactor Temporal Correlation:")
display(temporal_corr)
except Exception as e:
print(f"Error calculating temporal correlation: {e}")
else:
print("Single-site data not loaded, skipping analysis.")
2.7 Advanced analysis (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Analyze factor profiles using correlation
try:
factor_analysis = pmf_single.analysis.analyze_factor_profiles(method="correlation")
# Display correlation matrix
print("Factor Profile Correlation Matrix:")
display(factor_analysis['correlation_matrix'])
except Exception as e:
print(f"Error analyzing factor profiles: {e}")
# Detect potentially mixed factors
try:
mixed_factors = pmf_single.analysis.detect_mixed_factors(threshold=0.5)
print(f"\nPotentially mixed factors (threshold=0.5): {mixed_factors}")
except Exception as e:
print(f"Error detecting mixed factors: {e}")
else:
print("Single-site data not loaded, skipping advanced analysis.")
if single_loaded:
# Analyze factor profiles using correlation
try:
factor_analysis = pmf_single.analysis.analyze_factor_profiles(method="correlation")
# Display correlation matrix
print("Factor Profile Correlation Matrix:")
display(factor_analysis['correlation_matrix'])
except Exception as e:
print(f"Error analyzing factor profiles: {e}")
# Detect potentially mixed factors
try:
mixed_factors = pmf_single.analysis.detect_mixed_factors(threshold=0.5)
print(f"\nPotentially mixed factors (threshold=0.5): {mixed_factors}")
except Exception as e:
print(f"Error detecting mixed factors: {e}")
else:
print("Single-site data not loaded, skipping advanced analysis.")
3. Visualizations for Single Site Data¶
3.1 Plot factor profiles (Single Site)¶
In [ ]:
Copied!
pmf_single.plot.plot_time_series(stacked = True)
pmf_single.plot.plot_time_series(stacked = True)
In [ ]:
Copied!
if single_loaded:
# Plot factor profiles
try:
print("Plotting single-site factor profiles...")
fig = pmf_single.plot.plot_factor_profiles(normalize=True, log_scale=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting factor profiles: {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot factor profiles
try:
print("Plotting single-site factor profiles...")
fig = pmf_single.plot.plot_factor_profiles(normalize=True, log_scale=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting factor profiles: {e}")
else:
print("Single-site data not loaded, skipping plot.")
3.2 Plot time series contribution (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Plot with rolling mean
try:
print("Plotting single-site time series (rolling mean)...")
fig = pmf_single.plot.plot_time_series(rolling_mean=7)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting time series (rolling): {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot with rolling mean
try:
print("Plotting single-site time series (rolling mean)...")
fig = pmf_single.plot.plot_time_series(rolling_mean=7)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting time series (rolling): {e}")
else:
print("Single-site data not loaded, skipping plot.")
In [ ]:
Copied!
if single_loaded:
# Plot with rolling mean
try:
print("Plotting single-site time series (rolling mean)...")
fig = pmf_single.plot.plot_contributions_timeseries(stacked=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting time series (rolling): {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot with rolling mean
try:
print("Plotting single-site time series (rolling mean)...")
fig = pmf_single.plot.plot_contributions_timeseries(stacked=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting time series (rolling): {e}")
else:
print("Single-site data not loaded, skipping plot.")
3.3 Plot seasonal contribution pattern (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Plot seasonal contributions
try:
print("Plotting single-site seasonal contributions...")
# The function returns the dataframe used for plotting
df_seasonal_plot = pmf_single.plot.plot_seasonal_contribution(normalize=False, annual=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting seasonal contribution: {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot seasonal contributions
try:
print("Plotting single-site seasonal contributions...")
# The function returns the dataframe used for plotting
df_seasonal_plot = pmf_single.plot.plot_seasonal_contribution(normalize=False, annual=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting seasonal contribution: {e}")
else:
print("Single-site data not loaded, skipping plot.")
3.4 Plot stacked profiles (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Plot stacked profiles
try:
print("Plotting single-site stacked profiles...")
fig = pmf_single.plot.plot_stacked_profiles(constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting stacked profiles: {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot stacked profiles
try:
print("Plotting single-site stacked profiles...")
fig = pmf_single.plot.plot_stacked_profiles(constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting stacked profiles: {e}")
else:
print("Single-site data not loaded, skipping plot.")
3.5 Plot samples contributions (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Plot contribution per sample over time
try:
print("Plotting single-site sample contributions...")
fig = pmf_single.plot.plot_samples_sources_contribution(constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting sample contributions: {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot contribution per sample over time
try:
print("Plotting single-site sample contributions...")
fig = pmf_single.plot.plot_samples_sources_contribution(constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting sample contributions: {e}")
else:
print("Single-site data not loaded, skipping plot.")
3.6 Plot per microgram profile (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Plot per microgram profile
try:
print("Plotting single-site per microgram profile...")
fig = pmf_single.plot.plot_per_microgram(constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting per microgram profile: {e}")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot per microgram profile
try:
print("Plotting single-site per microgram profile...")
fig = pmf_single.plot.plot_per_microgram(constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting per microgram profile: {e}")
else:
print("Single-site data not loaded, skipping plot.")
3.7 Plot individual factor profile with uncertainties (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Select a factor to plot in detail
if pmf_single.profiles:
factor_to_plot = pmf_single.profiles[0]
print(f"Plotting details for factor: {factor_to_plot}")
# Plot profile uncertainty
try:
fig = pmf_single.plot.plot_profile_uncertainty(profile=factor_to_plot, constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting profile uncertainty: {e}")
else:
print("No profiles loaded to plot.")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Select a factor to plot in detail
if pmf_single.profiles:
factor_to_plot = pmf_single.profiles[0]
print(f"Plotting details for factor: {factor_to_plot}")
# Plot profile uncertainty
try:
fig = pmf_single.plot.plot_profile_uncertainty(profile=factor_to_plot, constrained=True)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error plotting profile uncertainty: {e}")
else:
print("No profiles loaded to plot.")
else:
print("Single-site data not loaded, skipping plot.")
3.8 Plot source profile details (Single Site)¶
In [ ]:
Copied!
if single_loaded:
# Plot detailed source profile
if pmf_single.profiles:
factor_to_plot = pmf_single.profiles[0]
try:
print(f"Plotting source profile details for: {factor_to_plot}")
# This function might generate multiple plots or print output
pmf_single.plot.plot_source_profile(profile=factor_to_plot, constrained=True)
# No plt.show() needed if the function handles it
except Exception as e:
print(f"Error plotting source profile details: {e}")
else:
print("No profiles loaded to plot.")
else:
print("Single-site data not loaded, skipping plot.")
if single_loaded:
# Plot detailed source profile
if pmf_single.profiles:
factor_to_plot = pmf_single.profiles[0]
try:
print(f"Plotting source profile details for: {factor_to_plot}")
# This function might generate multiple plots or print output
pmf_single.plot.plot_source_profile(profile=factor_to_plot, constrained=True)
# No plt.show() needed if the function handles it
except Exception as e:
print(f"Error plotting source profile details: {e}")
else:
print("No profiles loaded to plot.")
else:
print("Single-site data not loaded, skipping plot.")
3.9 Plot comprehensive profile information (Single Site)¶
4. Multi-Site Analysis & Visualizations¶
4.1 Explore multi-site data (if loaded)¶
In [ ]:
Copied!
# Check if multi-site data was loaded successfully
if multi_loaded:
print(f"--- Exploring Multi-site Data ({multi_site_name}) ---")
if pmf_multi.profiles:
print(f"Available Profiles (Factors): {pmf_multi.profiles}")
print(f"Number of factors: {pmf_multi.nprofiles}")
if pmf_multi.totalVar:
print(f"\nTotal Variable: {pmf_multi.totalVar}")
# Display first few species
if hasattr(pmf_multi, 'species') and pmf_multi.species is not None:
print(f"\nFirst 10 species: {pmf_multi.species[:10]}")
print(f"Total number of species: {pmf_multi.nspecies}")
# Show profile data
if hasattr(pmf_multi, 'dfprofiles_c') and pmf_multi.dfprofiles_c is not None and not pmf_multi.dfprofiles_c.empty:
print("\nFactor Profiles (constrained) - Head:")
display(pmf_multi.dfprofiles_c.head())
else:
print("\nMulti-site constrained profiles not loaded or empty.")
# Show contributions data if loaded
if hasattr(pmf_multi, 'dfcontrib_c') and pmf_multi.dfcontrib_c is not None and not pmf_multi.dfcontrib_c.empty:
print("\nFactor Contributions (constrained) - Head:")
display(pmf_multi.dfcontrib_c.head())
# Show unique stations
if 'Station' in pmf_multi.dfcontrib_c.columns:
print(f"\nUnique stations found: {pmf_multi.dfcontrib_c['Station'].unique().tolist()}")
else:
print("\nMulti-site constrained contributions not loaded or empty.")
else:
print("Multi-site data was not loaded successfully. Skipping multi-site exploration.")
# Check if multi-site data was loaded successfully
if multi_loaded:
print(f"--- Exploring Multi-site Data ({multi_site_name}) ---")
if pmf_multi.profiles:
print(f"Available Profiles (Factors): {pmf_multi.profiles}")
print(f"Number of factors: {pmf_multi.nprofiles}")
if pmf_multi.totalVar:
print(f"\nTotal Variable: {pmf_multi.totalVar}")
# Display first few species
if hasattr(pmf_multi, 'species') and pmf_multi.species is not None:
print(f"\nFirst 10 species: {pmf_multi.species[:10]}")
print(f"Total number of species: {pmf_multi.nspecies}")
# Show profile data
if hasattr(pmf_multi, 'dfprofiles_c') and pmf_multi.dfprofiles_c is not None and not pmf_multi.dfprofiles_c.empty:
print("\nFactor Profiles (constrained) - Head:")
display(pmf_multi.dfprofiles_c.head())
else:
print("\nMulti-site constrained profiles not loaded or empty.")
# Show contributions data if loaded
if hasattr(pmf_multi, 'dfcontrib_c') and pmf_multi.dfcontrib_c is not None and not pmf_multi.dfcontrib_c.empty:
print("\nFactor Contributions (constrained) - Head:")
display(pmf_multi.dfcontrib_c.head())
# Show unique stations
if 'Station' in pmf_multi.dfcontrib_c.columns:
print(f"\nUnique stations found: {pmf_multi.dfcontrib_c['Station'].unique().tolist()}")
else:
print("\nMulti-site constrained contributions not loaded or empty.")
else:
print("Multi-site data was not loaded successfully. Skipping multi-site exploration.")
4.2 Multi-site visualizations (if data loaded)¶
In [ ]:
Copied!
# Only run this if multi-site data was successfully loaded
if multi_loaded:
print("--- Multi-site Visualizations ---")
try:
# Plot factor profiles (often the same across sites in multi-site PMF)
if hasattr(pmf_multi, 'dfprofiles_c') and pmf_multi.dfprofiles_c is not None and not pmf_multi.dfprofiles_c.empty:
print("\nPlotting Multi-site Factor Profiles:")
fig = pmf_multi.plot.plot_factor_profiles(normalize=True)
plt.tight_layout()
plt.show()
else:
print("\nSkipping profile plot as constrained profiles are not available.")
except Exception as e:
print(f"Error plotting multi-site data: {str(e)}")
traceback.print_exc()
else:
print("Skipping multi-site visualizations as data was not loaded successfully.")
# Only run this if multi-site data was successfully loaded
if multi_loaded:
print("--- Multi-site Visualizations ---")
try:
# Plot factor profiles (often the same across sites in multi-site PMF)
if hasattr(pmf_multi, 'dfprofiles_c') and pmf_multi.dfprofiles_c is not None and not pmf_multi.dfprofiles_c.empty:
print("\nPlotting Multi-site Factor Profiles:")
fig = pmf_multi.plot.plot_factor_profiles(normalize=True)
plt.tight_layout()
plt.show()
else:
print("\nSkipping profile plot as constrained profiles are not available.")
except Exception as e:
print(f"Error plotting multi-site data: {str(e)}")
traceback.print_exc()
else:
print("Skipping multi-site visualizations as data was not loaded successfully.")
5. Comparison between Sites (if both loaded)¶
In [ ]:
Copied!
# Only run this if both datasets were successfully loaded
if single_loaded and multi_loaded:
print("--- Comparing Single-site vs Multi-site Data ---")
try:
# Find common species between the two sites
common_species = set(pmf_single.species).intersection(set(pmf_multi.species))
print(f"\nCommon species between sites: {len(common_species)}")
if len(common_species) > 0 and pmf_single.profiles and pmf_multi.profiles:
# Compare profiles using common species
species_list = list(common_species)
# Select first factor from each (ensure factors exist)
# WARNING: Factors might not correspond directly! This is just an example.
single_factor = pmf_single.profiles[0]
multi_factor = pmf_multi.profiles[0]
print(f"\nComparing profile '{single_factor}' (single) vs '{multi_factor}' (multi) - CAUTION: Factors may not match!")
# Single site profile for the factor
single_profile = pmf_single.dfprofiles_c[single_factor].loc[species_list]
# Multi site profile for the factor
multi_profile = pmf_multi.dfprofiles_c[multi_factor].loc[species_list]
# Calculate similarity metrics
similarity_metrics = compute_similarity_metrics(single_profile, multi_profile)
print(f"\nSimilarity metrics between '{single_factor}' and '{multi_factor}':")
for metric, value in similarity_metrics.items():
print(f"- {metric}: {value:.4f}")
# Plot comparison (scatter plot)
plt.figure(figsize=(10, 6))
plt.scatter(single_profile, multi_profile, alpha=0.7)
plt.xscale('log')
plt.yscale('log')
plt.xlabel(f"{single_site_name} - {single_factor} (µg/µg {pmf_single.totalVar or ''})")
plt.ylabel(f"{multi_site_name} - {multi_factor} (µg/µg {pmf_multi.totalVar or ''})")
plt.title(f"Profile Comparison: {single_factor} vs {multi_factor}")
# Add 1:1 line for reference
valid_vals = single_profile.fillna(0) + multi_profile.fillna(0)
min_val = (valid_vals[valid_vals > 0]).min() * 0.5 # Avoid log(0)
max_val = valid_vals.max() * 1.5
if pd.notna(min_val) and pd.notna(max_val) and min_val > 0:
plt.plot([min_val, max_val], [min_val, max_val], 'k--', alpha=0.5, label='1:1 Line')
plt.xlim(min_val, max_val)
plt.ylim(min_val, max_val)
# Add labels for a few key species (optional, can get crowded)
# for sp in species_list:
# sp_single_val = single_profile.get(sp)
# sp_multi_val = multi_profile.get(sp)
# if pd.notna(sp_single_val) and pd.notna(sp_multi_val) and (sp_single_val > 0.01 or sp_multi_val > 0.01): # Threshold for labeling
# plt.annotate(sp, (sp_single_val, sp_multi_val), textcoords="offset points", xytext=(0,5), ha='center', fontsize=8)
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()
else:
print("Cannot compare profiles: No common species or profiles are missing.")
except Exception as e:
print(f"Error comparing sites: {str(e)}")
traceback.print_exc()
else:
print("Skipping site comparison as one or both datasets were not loaded successfully.")
# Only run this if both datasets were successfully loaded
if single_loaded and multi_loaded:
print("--- Comparing Single-site vs Multi-site Data ---")
try:
# Find common species between the two sites
common_species = set(pmf_single.species).intersection(set(pmf_multi.species))
print(f"\nCommon species between sites: {len(common_species)}")
if len(common_species) > 0 and pmf_single.profiles and pmf_multi.profiles:
# Compare profiles using common species
species_list = list(common_species)
# Select first factor from each (ensure factors exist)
# WARNING: Factors might not correspond directly! This is just an example.
single_factor = pmf_single.profiles[0]
multi_factor = pmf_multi.profiles[0]
print(f"\nComparing profile '{single_factor}' (single) vs '{multi_factor}' (multi) - CAUTION: Factors may not match!")
# Single site profile for the factor
single_profile = pmf_single.dfprofiles_c[single_factor].loc[species_list]
# Multi site profile for the factor
multi_profile = pmf_multi.dfprofiles_c[multi_factor].loc[species_list]
# Calculate similarity metrics
similarity_metrics = compute_similarity_metrics(single_profile, multi_profile)
print(f"\nSimilarity metrics between '{single_factor}' and '{multi_factor}':")
for metric, value in similarity_metrics.items():
print(f"- {metric}: {value:.4f}")
# Plot comparison (scatter plot)
plt.figure(figsize=(10, 6))
plt.scatter(single_profile, multi_profile, alpha=0.7)
plt.xscale('log')
plt.yscale('log')
plt.xlabel(f"{single_site_name} - {single_factor} (µg/µg {pmf_single.totalVar or ''})")
plt.ylabel(f"{multi_site_name} - {multi_factor} (µg/µg {pmf_multi.totalVar or ''})")
plt.title(f"Profile Comparison: {single_factor} vs {multi_factor}")
# Add 1:1 line for reference
valid_vals = single_profile.fillna(0) + multi_profile.fillna(0)
min_val = (valid_vals[valid_vals > 0]).min() * 0.5 # Avoid log(0)
max_val = valid_vals.max() * 1.5
if pd.notna(min_val) and pd.notna(max_val) and min_val > 0:
plt.plot([min_val, max_val], [min_val, max_val], 'k--', alpha=0.5, label='1:1 Line')
plt.xlim(min_val, max_val)
plt.ylim(min_val, max_val)
# Add labels for a few key species (optional, can get crowded)
# for sp in species_list:
# sp_single_val = single_profile.get(sp)
# sp_multi_val = multi_profile.get(sp)
# if pd.notna(sp_single_val) and pd.notna(sp_multi_val) and (sp_single_val > 0.01 or sp_multi_val > 0.01): # Threshold for labeling
# plt.annotate(sp, (sp_single_val, sp_multi_val), textcoords="offset points", xytext=(0,5), ha='center', fontsize=8)
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()
else:
print("Cannot compare profiles: No common species or profiles are missing.")
except Exception as e:
print(f"Error comparing sites: {str(e)}")
traceback.print_exc()
else:
print("Skipping site comparison as one or both datasets were not loaded successfully.")
In [ ]:
Copied!
pmf_single.get_total_species_sum().index
pmf_multi.get_total_species_sum().index
pmf_single.get_total_species_sum().index
pmf_multi.get_total_species_sum().index
In [ ]:
Copied!
pmf_single.name = "single"
pmf_multi.name = "multi"
pmf_single.name = "single"
pmf_multi.name = "multi"
In [ ]:
Copied!
pmf_single.plot.plot_contribution_summary()
pmf_single.plot.plot_contribution_summary()
In [ ]:
Copied!