From e75a5fdb957f447997bfeaac9b286a59ec80d46f Mon Sep 17 00:00:00 2001
From: Mauro <mauro.farina99@gmail.com>
Date: Sun, 16 Nov 2025 11:45:52 +0100
Subject: [PATCH 1/2] Add stats for CDF and boxplots

---
 fastplot/__init__.py | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/fastplot/__init__.py b/fastplot/__init__.py
index 3f35708..752ff40 100644
--- a/fastplot/__init__.py
+++ b/fastplot/__init__.py
@@ -47,7 +47,7 @@ def plot(data, path, mode = 'line',
          linewidth = 1, boxplot_whis=[5,95], timeseries_format='%Y/%m/%d', bars_width=0.6,
          boxplot_numerousness = False, boxplot_numerousness_fontsize = 'x-small', boxplot_fliersize=0,
          boxplot_palette=sns.color_palette(), boxplot_empty=False, boxplot_numerousness_rotate=None,
-         callback = None, timeseries_stacked_right_legend_order=True, CDF_complementary=False, vlines=None, hlines=None, vlines_style={}, hlines_style={}):
+         callback = None, timeseries_stacked_right_legend_order=True, CDF_complementary=False, vlines=None, hlines=None, vlines_style={}, hlines_style={}, stats=None):
 
     # 1. Create and configure plot visual style
     plt.rcParams.update(plt.rcParamsDefault)
@@ -121,6 +121,11 @@ def plot(data, path, mode = 'line',
         if ylim is None:
             ylim = (0,1)
 
+        if isinstance(stats, dict):
+            clear_dict(stats)
+            for k, v in get_distribution_stats(data).items():
+                stats[k] = v
+
     elif mode == 'CDF_multi':
         for s_name, s in data :
             e = ECDF(s)
@@ -149,6 +154,13 @@ def plot(data, path, mode = 'line',
         if ylim is None:
             ylim = (0,1)
 
+        if isinstance(stats, dict):
+            clear_dict(stats)
+            for name, samples in data:
+                stats[name] = {}
+                for k, v in get_distribution_stats(samples).items():
+                    stats[name][k] = v
+
     elif mode == 'boxplot':
         labels = [e[0] for e in data]
         samples = [e[1] for e in data]
@@ -177,6 +189,13 @@ def plot(data, path, mode = 'line',
             plt.setp(plt.gca().patches, edgecolor = 'black', facecolor='white', linewidth =1)
             plt.setp(plt.gca().lines, color='black', linewidth =1)
 
+        if isinstance(stats, dict):
+            clear_dict(stats)
+            for name, samples in data:
+                stats[name] = {}
+                for k, v in get_distribution_stats(samples).items():
+                    stats[name][k] = v
+
     elif mode == 'boxplot_multi':
         new_data = []
         for c in data:
@@ -189,6 +208,13 @@ def plot(data, path, mode = 'line',
         p.legend().remove()
         plt.xlabel("")
         plt.gca().set_xticklabels(data.index)
+
+        if isinstance(stats, dict):
+            clear_dict(stats)
+            for c in data:
+                stats[c] = {}
+                for index, values in data[c].items():
+                    stats[c][index] = get_distribution_stats(values)
         
     elif mode == 'timeseries':
         plt.plot(data, markeredgewidth=0, linewidth = linewidth, **plot_args) 
@@ -340,6 +366,22 @@ def tex_escape(text):
     regex = re.compile('|'.join(re.escape(str(key)) for key in sorted(conv.keys(), key = lambda item: - len(item))))
     return regex.sub(lambda match: conv[match.group()], text)
 
+
+def get_distribution_stats(data):
+    stats = {}
+    stats['min'] = np.min(data)
+    stats['q1'] = np.percentile(data, 25)
+    stats['median'] = np.median(data)
+    stats['q3'] = np.percentile(data, 75)
+    stats['max'] = np.max(data)
+    stats['mean'] = np.mean(data)
+    stats['std'] = np.std(data)
+    return stats
+
+def clear_dict(d):
+    for k in list(d.keys()):
+        del d[k]
+
 def gini(arr):
     count = arr.size
     coefficient = 2 / count

From 8b1a214760e6c2713b2091f9c07bdf375a17351b Mon Sep 17 00:00:00 2001
From: Mauro <mauro.farina99@gmail.com>
Date: Mon, 17 Nov 2025 11:01:17 +0100
Subject: [PATCH 2/2] Add matplotlib ecdf

---
 fastplot/__init__.py | 83 +++++++++++++++++++++++++-------------------
 requirements.txt     |  1 +
 setup.py             |  2 +-
 3 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/fastplot/__init__.py b/fastplot/__init__.py
index 752ff40..9e36563 100644
--- a/fastplot/__init__.py
+++ b/fastplot/__init__.py
@@ -10,8 +10,12 @@
 import numpy as np
 import pandas as pd
 import re
+from packaging import version
 from statsmodels.distributions.empirical_distribution import ECDF
 
+MPL_VERSION = mpl.__version__
+HAS_ECDF_PLOT = version.parse(MPL_VERSION) >= version.parse('3.8.0')  # 3.8.0 was released in September 2023
+
 # Register Pandas Converters
 from pandas.plotting import register_matplotlib_converters
 register_matplotlib_converters()
@@ -96,38 +100,12 @@ def plot(data, path, mode = 'line',
         plt.plot(data[0], data[1], markeredgewidth=0, linewidth = linewidth, **plot_args) 
 
     elif mode == 'CDF':
-        s = data
-        e = ECDF(s)
-        if xscale == 'log':
-            x = np.logspace(np.log10(min(s)), np.log10(max(s)), NUM_BIN_CDF )
-            if CDF_complementary:
-                y = 1-e(x)
-            else:
-                y = e(x)
+        if HAS_ECDF_PLOT:
+            # Use plt.ecdf for matplotlib 3.8+
+            plt.ecdf(data, complementary=CDF_complementary, linewidth=linewidth, **plot_args)
         else:
-            x = np.linspace(min(s), max(s), NUM_BIN_CDF )  
-            if CDF_complementary:
-                y = 1-e(x)
-                x = np.concatenate( (np.array([min(s)]), x) )
-                y = np.concatenate( (np.array([1]), y) )
-            else:
-                y = e(x)
-                x = np.concatenate( (np.array([min(s)]), x) )
-                y = np.concatenate( (np.array([0]), y) )
-
-        plt.plot(x,y, linewidth = linewidth, **plot_args)
-        if ylabel is None:
-            ylabel = 'CCDF' if CDF_complementary else "CDF"
-        if ylim is None:
-            ylim = (0,1)
-
-        if isinstance(stats, dict):
-            clear_dict(stats)
-            for k, v in get_distribution_stats(data).items():
-                stats[k] = v
-
-    elif mode == 'CDF_multi':
-        for s_name, s in data :
+            # Fallback to statsmodels for older matplotlib versions
+            s = data
             e = ECDF(s)
             if xscale == 'log':
                 x = np.logspace(np.log10(min(s)), np.log10(max(s)), NUM_BIN_CDF )
@@ -137,7 +115,6 @@ def plot(data, path, mode = 'line',
                     y = e(x)
             else:
                 x = np.linspace(min(s), max(s), NUM_BIN_CDF )  
-
                 if CDF_complementary:
                     y = 1-e(x)
                     x = np.concatenate( (np.array([min(s)]), x) )
@@ -147,7 +124,44 @@ def plot(data, path, mode = 'line',
                     x = np.concatenate( (np.array([min(s)]), x) )
                     y = np.concatenate( (np.array([0]), y) )
 
-            plt.plot(x,y, label=s_name, linewidth = linewidth, **plot_args)
+            plt.plot(x,y, linewidth = linewidth, **plot_args)
+        if ylabel is None:
+            ylabel = 'CCDF' if CDF_complementary else "CDF"
+        if ylim is None:
+            ylim = (0,1)
+
+        if isinstance(stats, dict):
+            clear_dict(stats)
+            for k, v in get_distribution_stats(data).items():
+                stats[k] = v
+
+    elif mode == 'CDF_multi':
+        for s_name, s in data:
+            if HAS_ECDF_PLOT:
+                # Use plt.ecdf for matplotlib 3.8+
+                plt.ecdf(s, label=s_name, complementary=CDF_complementary, linewidth=linewidth, **plot_args)
+            else:
+                # Fallback for older matplotlib versions
+                e = ECDF(s)
+                if xscale == 'log':
+                    x = np.logspace(np.log10(min(s)), np.log10(max(s)), NUM_BIN_CDF )
+                    if CDF_complementary:
+                        y = 1-e(x)
+                    else:
+                        y = e(x)
+                else:
+                    x = np.linspace(min(s), max(s), NUM_BIN_CDF )  
+
+                    if CDF_complementary:
+                        y = 1-e(x)
+                        x = np.concatenate( (np.array([min(s)]), x) )
+                        y = np.concatenate( (np.array([1]), y) )
+                    else:
+                        y = e(x)
+                        x = np.concatenate( (np.array([min(s)]), x) )
+                        y = np.concatenate( (np.array([0]), y) )
+
+                plt.plot(x,y, label=s_name, linewidth = linewidth, **plot_args)
 
         if ylabel is None:
             ylabel = 'CCDF' if CDF_complementary else "CDF"
@@ -407,5 +421,4 @@ def lorenz_gini_multi(data, name_format="{} (GI={:0.2f})"):
         name_new = name_format.format(name, gini_index)
         data_new.append( (name_new, (lorenz_x,lorenz_y) )   )
     return data_new
-  
-
+  
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 32296f5..68131fe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ pandas
 statsmodels
 scipy
 seaborn
+packaging
diff --git a/setup.py b/setup.py
index d0c077a..5fb3a6b 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
     url="https://github.com/marty90/fastplot",
     download_url = 'https://github.com/marty90/fastplot/tarball/1.5.0',
     packages=['fastplot'],
-    install_requires=['matplotlib', 'numpy', 'pandas', 'statsmodels', 'scipy', 'seaborn']
+    install_requires=['matplotlib', 'numpy', 'pandas', 'statsmodels', 'scipy', 'seaborn', 'packaging']
 )
 
 # Upload on pip with: