import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


df = pd.read_csv(f"/content/drive/MyDrive/DataScience/Data/MB_SMB_D_BMB.csv")
df.head()


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13629 entries, 0 to 13628
Data columns (total 9 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   time     13629 non-null  object 
 1   MB       13629 non-null  float64
 2   MB_err   13629 non-null  float64
 3   SMB      13629 non-null  float64
 4   SMB_err  13629 non-null  float64
 5   D        13629 non-null  float64
 6   D_err    13629 non-null  float64
 7   BMB      13629 non-null  float64
 8   BMB_err  13629 non-null  float64
dtypes: float64(8), object(1)
memory usage: 958.4+ KB


df["time"] = pd.to_datetime(df["time"]) #changing data type 
dfs = df[["MB","SMB","BMB", "D"]].copy() #feature selection
dfs["year"] = df["time"].dt.year #adding a year column by extracting year from time attribute
dfm = dfs.groupby(dfs["year"]).mean() #grouping by year


dfm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 183 entries, 1840 to 2022
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   MB      183 non-null    float64
 1   SMB     183 non-null    float64
 2   BMB     183 non-null    float64
 3   D       183 non-null    float64
dtypes: float64(4)
memory usage: 7.1 KB


fig, ax = plt.subplots(2,2, figsize=(12,12))
fig.suptitle("Outlier Analysis",fontsize='large', fontweight='bold')
fig.tight_layout(pad=5.0)

dic = ax[0][0].boxplot(dfm["D"])
ax[0][0].set_title("Discharge",fontsize='large', fontweight='bold')
ax[0][0].set_ylabel("D",fontsize='large', fontweight='bold')

ax[0][1].boxplot(dfm["MB"])
ax[0][1].set_title("Mass Balance",fontsize='large', fontweight='bold')
ax[0][1].set_ylabel("MB",fontsize='large', fontweight='bold')

ax[1][0].boxplot(dfm["SMB"])
ax[1][0].set_title("Surface Mass Balance",fontsize='large', fontweight='bold')
ax[1][0].set_ylabel("SMB",fontsize='large', fontweight='bold')

ax[1][1].boxplot(dfm["BMB"])
ax[1][1].set_title("Basal Mass Balance",fontsize='large', fontweight='bold')
ax[1][1].set_ylabel("BMB",fontsize='large', fontweight='bold')

plt.show()


dfm["rank"] = dfm["MB"].rank(ascending = True)
dfm = dfm[(dfm["rank"] != 1.0)]
dfm = dfm[(dfm["rank"] != 2.0)]
dfm = dfm[(dfm["rank"] != 3.0)]
dfm["rank"] = dfm["D"].rank(ascending = False)
dfm = dfm[(dfm["rank"] != 1.0)]
dfm["rank"] = dfm["SMB"].rank(ascending = False)
dfm = dfm[(dfm["rank"] != 1.0)]
dfm["rank"] = dfm["BMB"].rank(ascending = True)
dfm = dfm[(dfm["rank"] != 1.0)]
dfm.shape

(177, 5)


fig, ax = plt.subplots(2,2, figsize=(12,12))
fig.suptitle("Distribution of features")
fig.tight_layout(pad=5.0)

ax[0][0].boxplot(dfm["D"])
ax[0][0].set_title("Discharge")
ax[0][0].set_ylabel("D")

ax[0][1].boxplot(dfm["MB"])
ax[0][1].set_title("Mass Balance")
ax[0][1].set_ylabel("MB")

ax[1][0].boxplot(dfm["SMB"])
ax[1][0].set_title("Surface Mass Balance")
ax[1][0].set_ylabel("SMB")

ax[1][1].boxplot(dfm["BMB"])
ax[1][1].set_title("Basal Mass Balance")
ax[1][1].set_ylabel("BMB")

plt.show()


dfm.shape

(177, 5)


fig, ax = plt.subplots(2,2, figsize=(12,12))
fig.suptitle("Distribution of features",fontsize='large', fontweight='bold')
fig.tight_layout(pad=5.0)

ax[0][0].hist(dfm["D"])
ax[0][0].set_title("Discharge Distributuion",fontsize='large', fontweight='bold')
ax[0][0].set_xlabel("D",fontsize='large', fontweight='bold')
ax[0][0].set_ylabel("Density",fontsize='large', fontweight='bold')

ax[0][1].hist(dfm["MB"])
ax[0][1].set_title("Mass Balance Distributuion",fontsize='large', fontweight='bold')
ax[0][1].set_xlabel("MB",fontsize='large', fontweight='bold')
ax[0][1].set_ylabel("Density",fontsize='large', fontweight='bold')

ax[1][0].hist(dfm["SMB"])
ax[1][0].set_title("Surface Mass Balance Distributuion",fontsize='large', fontweight='bold')
ax[1][0].set_xlabel("SMB",fontsize='large', fontweight='bold')
ax[1][0].set_ylabel("Density",fontsize='large', fontweight='bold')

ax[1][1].hist(dfm["BMB"])
ax[1][1].set_title("Basal Mass Balance Distributuion",fontsize='large', fontweight='bold')
ax[1][1].set_xlabel("BMB",fontsize='large', fontweight='bold')
ax[1][1].set_ylabel("Density",fontsize='large', fontweight='bold')
plt.show()


mask = dfm["MB"]<0
plt.figure(figsize = (15,10))
plt.bar(dfm.index[mask],dfm["MB"][mask], color = 'red', label = "Ice Loss")
plt.bar(dfm.index[~mask],dfm["MB"][~mask],color = 'green', label = "Ice Gain")
plt.axhline(y = 0, linestyle = '-', color  = 'black')
plt.xlabel('Year',fontsize='large', fontweight='bold')
plt.ylabel('Mass Balance',fontsize='large', fontweight='bold')
plt.title("Yearly Mass Balance of Ice",fontsize='large', fontweight='bold')
plt.xticks(np.arange(1842,2023,10), rotation = 90)
plt.legend()
plt.show()


dfm.head()
plt.figure(figsize = (10,10))
# plt.title("Percentage of Years with ICE LOSS and ICE GAIN during time period 1840 to 2022",fontsize='large', fontweight='bold')
colors = ['#e83845','#32CD32']
dfm["ice"] = np.where(dfm["MB"]<0, "Ice loss", "Ice gain")
patches, texts, autotexts=plt.pie(dfm["ice"].value_counts(), labels = ["Ice loss", "Ice gain"],autopct='%.1f%% of Years',pctdistance=0.8,counterclock=True,startangle=90,colors=colors)
texts[0].set_fontsize(20)
autotexts[0].set_fontsize(20)
texts[1].set_fontsize(20)
autotexts[1].set_fontsize(20)
plt.show()


fig, ax = plt.subplots(3,1, figsize=(15,10), sharex = True)
fig.suptitle("Distribution of features")
fig.tight_layout(pad=5.0)

ax[0].plot(dfm.index,dfm["D"],color = 'red', label = "D")
ax[1].plot(dfm.index,dfm["MB"], label = "MB")
ax[1].plot(dfm.index,dfm["SMB"], label = "SMB")
ax[0].legend()
plt.xlabel('Year')
ax[0].set_ylabel('Ice Discharge')
ax[1].set_ylabel('Mass Balance')
ax[0].set_title("Ice Discharge rate")
ax[1].legend()
ax[2].plot(dfm.index,dfm["BMB"], label = "BMB")
plt.xticks(np.arange(1842,2023,10), rotation = 90)
plt.legend()
plt.show()


fig,axc = plt.subplots(2,2,figsize=(12,12))
fig.suptitle("Correlation between features",fontsize='large', fontweight='bold')
fig.tight_layout(pad=5.0)

m, b = np.polyfit(dfm["SMB"],dfm["MB"], 1)
axc[0][0].set_title("SMB and MB Correlation",fontsize='large', fontweight='bold')
axc[0][0].set_xlabel("Surface MB",fontsize='large', fontweight='bold')
axc[0][0].set_ylabel("Mass Balance (MB)",fontsize='large', fontweight='bold')
axc[0][0].plot(dfm["SMB"], m*dfm["SMB"]+b, color='red')
axc[0][0].scatter(dfm["SMB"],dfm["MB"])

m, b = np.polyfit(dfm["D"],dfm["MB"], 1)
axc[0][1].set_title("MB and D Correlation",fontsize='large', fontweight='bold')
axc[0][1].set_ylabel("Mass Balance (MB)",fontsize='large', fontweight='bold')
axc[0][1].set_xlabel("Discharge (D)",fontsize='large', fontweight='bold')
axc[0][1].plot(dfm["D"], m*dfm["D"]+b, color='red')
axc[0][1].scatter(dfm["D"],dfm["MB"])

m, b = np.polyfit(dfm["D"],dfm["SMB"], 1)
axc[1][0].set_title("SMB and D Correlation",fontsize='large', fontweight='bold')
axc[1][0].set_ylabel("Surface MB",fontsize='large', fontweight='bold')
axc[1][0].set_xlabel("Discharge (D)",fontsize='large', fontweight='bold')
axc[1][0].plot(dfm["D"], m*dfm["D"]+b, color='red')
axc[1][0].scatter(dfm["D"],dfm["SMB"])

m, b = np.polyfit(dfm["D"],dfm["BMB"], 1)
axc[1][1].set_title("BMB and D Correlation",fontsize='large', fontweight='bold')
axc[1][1].set_ylabel("Basel MB",fontsize='large', fontweight='bold')
axc[1][1].set_xlabel("Discharge (D)",fontsize='large', fontweight='bold')
axc[1][1].plot(dfm["D"], m*dfm["D"]+b, color='red')
axc[1][1].scatter(dfm["D"],dfm["BMB"])

plt.show()

	time	MB	MB_err	SMB	SMB_err	D	D_err	BMB	BMB_err
0	1840-01-01	0.267185	0.373477	1.407972	0.302918	1.079574	0.215092	0.061212	0.038220
1	1841-01-01	0.300225	0.357615	1.438253	0.284199	1.079574	0.213832	0.058454	0.037356
2	1842-01-01	-0.284109	0.357375	0.857085	0.281043	1.079574	0.217393	0.061620	0.038363
3	1843-01-01	-0.320739	0.372270	0.822635	0.301422	1.079574	0.214927	0.063799	0.039190
4	1844-01-01	-0.473110	0.369937	0.670860	0.298957	1.079574	0.214297	0.064395	0.039434

Greenland Ice Glacier Analysis - Data Visualization Project¶

Introduction¶

Importing packages and loading data¶

Data Wrangling¶

Outlier Analysis¶

Data visualization¶

How the data is distributed¶

How was the Mass balance across the time¶

How was Mass Balance and Discharge trends across the years¶

Correlation between all off them¶