地下水用量的估计¶
我们的实验能够分出,地表水和地下水的变化趋势,是否在存在制度变化后存在上升/下降。
In [1]:
Copied!
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context="notebook", style="whitegrid")
import pandas as pd
import numpy as np
from hydra import compose, initialize
import os
# 加载项目层面的配置
with initialize(version_base=None, config_path="../../config"):
cfg = compose(config_name="config")
os.chdir(cfg.root)
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context="notebook", style="whitegrid")
import pandas as pd
import numpy as np
from hydra import compose, initialize
import os
# 加载项目层面的配置
with initialize(version_base=None, config_path="../../config"):
cfg = compose(config_name="config")
os.chdir(cfg.root)
In [2]:
Copied!
sns.set_theme(style="ticks")
colors = {
"precipitation": "#AAD9BB",
"norm_withdraw": "#92C7CF",
"over_withdraw": "#D24545",
"ground_water": "#F9EFDB",
}
palette = {True: "#B2A59B", False: "#3468C0"}
sns.set_theme(style="ticks")
colors = {
"precipitation": "#AAD9BB",
"norm_withdraw": "#92C7CF",
"over_withdraw": "#D24545",
"ground_water": "#F9EFDB",
}
palette = {True: "#B2A59B", False: "#3468C0"}
准备数据¶
我们在最新的重复了5次运行的标准实验中:
- exp_id = 0 代表没有98-UBR的假设情景
- exp_id = 1 代表按照实际制度的模拟结果
其它的试验参数都保持一致,每个实验情景下的重复编号为:
In [3]:
Copied!
# 每年的各种用水来源
from src.analysis import ExpAnalysis
# 读取实验
exp = ExpAnalysis("new_standard")
# 展示编号
exp.run_ids
# 每年的各种用水来源
from src.analysis import ExpAnalysis
# 读取实验
exp = ExpAnalysis("new_standard")
# 展示编号
exp.run_ids
Out[3]:
run_id 0 (197, 198, 199, 200, 201) 1 (202, 203, 204, 205, 206) Name: id, dtype: object
给定某些 run_id,应该能自动绘制这些实验结果的平均年总用水量
In [4]:
Copied!
data_scenario = exp.get_data(exp_id=0)
data_actual = exp.get_data(exp_id=1)
data_scenario["scenario"] = True
data_actual["scenario"] = False
data = pd.concat([data_actual, data_scenario])
# 只选取1987年之后的数据
data = data[data["year"] >= 1987]
# 标记所有 98-UBR 之后的数据
data["after"] = data.apply(lambda row: True if row["year"] > 1998 else False, axis=1)
# 单独做一个1998年后的数据
data_98 = data[data["year"] > 1998]
data.head()
data_scenario = exp.get_data(exp_id=0)
data_actual = exp.get_data(exp_id=1)
data_scenario["scenario"] = True
data_actual["scenario"] = False
data = pd.concat([data_actual, data_scenario])
# 只选取1987年之后的数据
data = data[data["year"] >= 1987]
# 标记所有 98-UBR 之后的数据
data["after"] = data.apply(lambda row: True if row["year"] > 1998 else False, axis=1)
# 单独做一个1998年后的数据
data_98 = data[data["year"] > 1998]
data.head()
Out[4]:
| id | run_id | farmer_id | city | year | month | crop | province | quota_min | quota_max | precipitation | surface_water | ground_water | deficits | demands | kc | boldness | vengefulness | scenario | after | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13968 | 612409 | 202 | 176 | 235 | 1987 | 1 | Maize | Shandong | 1,060,320.00 | 1,060,320.00 | 1,357,890.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 1.00 | False | False |
| 13969 | 612410 | 202 | 139 | 232 | 1987 | 1 | Wheat | Shandong | 669,679.00 | 669,679.00 | 905,493.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.21 | 1.00 | False | False |
| 13970 | 612411 | 202 | 161 | 100 | 1987 | 1 | Wheat | Henan | 4,235,600.00 | 4,235,600.00 | 1,131,690.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.88 | 0.76 | False | False |
| 13971 | 612412 | 202 | 110 | 226 | 1987 | 1 | Wheat | Qinghai | 5,563,320.00 | 5,563,320.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.59 | 0.60 | False | False |
| 13972 | 612413 | 202 | 315 | 265 | 1987 | 1 | Maize | Shaanxi | 11,107,300.00 | 11,107,300.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.05 | 0.32 | False | False |
地表水的变化趋势对比¶
In [5]:
Copied!
# 绘制地表水的变化趋势
from src.analysis.plots import with_axes
from matplotlib.axes import Axes
@with_axes(figsize=(4, 3))
def plot_surface_water_dynamic(ax: Axes | None = None):
# 分组到每个实验、每年、对比不同情景
tmp_data = (
data.groupby(["run_id", "year", "scenario"])["surface_water"]
.sum()
.reset_index()
)
ax = sns.lineplot(
x="year",
y="surface_water",
data=tmp_data,
ax=ax,
hue="scenario",
palette=palette,
)
# 修饰图片
ax.set_xticks(np.arange(1988, 2013, 4))
ax.set_yticks(np.arange(0.8e10, 1.8e10, 0.2e10))
ax.set_xlim(1988, 2012)
ax.set_xlabel("Year")
ax.set_ylabel("Surface water ($m^3$)")
sns.despine(ax=ax, offset=10, trim=True)
ax.axvline(x=1998, ls=":", color=colors["over_withdraw"], label="1998")
ax.legend_.remove()
return ax
plot_surface_water_dynamic()
# 绘制地表水的变化趋势
from src.analysis.plots import with_axes
from matplotlib.axes import Axes
@with_axes(figsize=(4, 3))
def plot_surface_water_dynamic(ax: Axes | None = None):
# 分组到每个实验、每年、对比不同情景
tmp_data = (
data.groupby(["run_id", "year", "scenario"])["surface_water"]
.sum()
.reset_index()
)
ax = sns.lineplot(
x="year",
y="surface_water",
data=tmp_data,
ax=ax,
hue="scenario",
palette=palette,
)
# 修饰图片
ax.set_xticks(np.arange(1988, 2013, 4))
ax.set_yticks(np.arange(0.8e10, 1.8e10, 0.2e10))
ax.set_xlim(1988, 2012)
ax.set_xlabel("Year")
ax.set_ylabel("Surface water ($m^3$)")
sns.despine(ax=ax, offset=10, trim=True)
ax.axvline(x=1998, ls=":", color=colors["over_withdraw"], label="1998")
ax.legend_.remove()
return ax
plot_surface_water_dynamic()
对比地表水在不同时期的差异¶
In [17]:
Copied!
from scipy import stats
@with_axes(figsize=(3, 2))
def compare_water_uses(y_col: str = "surface_water", ax: Axes | None = None) -> Axes:
# 准备数据
tmp_data = (
data.groupby(["run_id", "year", "scenario", "after"])[y_col].sum().reset_index()
)
# 平均值数据
stats_data = (
tmp_data.groupby(["year", "scenario", "after"])[y_col]
.mean()
.reset_index(level=0)
.sort_index()
)
# 统计检验
y1 = stats_data.loc[(False, False), y_col]
y2 = stats_data.loc[(True, False), y_col]
tstat, pval = stats.ttest_rel(y1, y2)
print("Before 1998-UBR, t-stat: {:.2f} pval: {:.4f}".format(tstat, pval))
# 统计检验
y1 = stats_data.loc[(False, True), y_col]
y2 = stats_data.loc[(True, True), y_col]
tstat, pval = stats.ttest_rel(y1, y2)
print("After 1998-UBR, t-stat: {:.2f} pval: {:.4f}".format(tstat, pval))
# 绘图
ax = sns.boxplot(
data=tmp_data,
x="after",
y=y_col,
hue="scenario",
ax=ax,
palette=palette,
)
sns.despine(ax=ax, offset=0, trim=False)
ax.legend_.remove()
ax.set_xlabel("After 1998-UBR")
ax.set_ylabel("Groundwater ($m^3$)")
ax.axvline(x=0.5, ls=":", color=colors["over_withdraw"], label="98-UBR")
return ax
compare_water_uses("ground_water")
from scipy import stats
@with_axes(figsize=(3, 2))
def compare_water_uses(y_col: str = "surface_water", ax: Axes | None = None) -> Axes:
# 准备数据
tmp_data = (
data.groupby(["run_id", "year", "scenario", "after"])[y_col].sum().reset_index()
)
# 平均值数据
stats_data = (
tmp_data.groupby(["year", "scenario", "after"])[y_col]
.mean()
.reset_index(level=0)
.sort_index()
)
# 统计检验
y1 = stats_data.loc[(False, False), y_col]
y2 = stats_data.loc[(True, False), y_col]
tstat, pval = stats.ttest_rel(y1, y2)
print("Before 1998-UBR, t-stat: {:.2f} pval: {:.4f}".format(tstat, pval))
# 统计检验
y1 = stats_data.loc[(False, True), y_col]
y2 = stats_data.loc[(True, True), y_col]
tstat, pval = stats.ttest_rel(y1, y2)
print("After 1998-UBR, t-stat: {:.2f} pval: {:.4f}".format(tstat, pval))
# 绘图
ax = sns.boxplot(
data=tmp_data,
x="after",
y=y_col,
hue="scenario",
ax=ax,
palette=palette,
)
sns.despine(ax=ax, offset=0, trim=False)
ax.legend_.remove()
ax.set_xlabel("After 1998-UBR")
ax.set_ylabel("Groundwater ($m^3$)")
ax.axvline(x=0.5, ls=":", color=colors["over_withdraw"], label="98-UBR")
return ax
compare_water_uses("ground_water")
多取的值就是从地表水少取的值,这样也合理也不合理,我们需要知道没有这次政策,WUI会怎样变化。
对比不同作物的地下水增加值¶
In [7]:
Copied!
tmp_data = (
data_98.groupby(["run_id", "scenario", "year", "crop"])["ground_water"]
.sum()
.reset_index()
)
tmp_data.head()
tmp_data = (
data_98.groupby(["run_id", "scenario", "year", "crop"])["ground_water"]
.sum()
.reset_index()
)
tmp_data.head()
Out[7]:
| run_id | scenario | year | crop | ground_water | |
|---|---|---|---|---|---|
| 0 | 197 | True | 1999 | Maize | 2,271,405,962.10 |
| 1 | 197 | True | 1999 | Rice | 1,407,720,120.00 |
| 2 | 197 | True | 1999 | Wheat | 7,911,493,766.20 |
| 3 | 197 | True | 2000 | Maize | 1,872,647,741.70 |
| 4 | 197 | True | 2000 | Rice | 1,450,891,370.00 |
In [8]:
Copied!
ax = sns.catplot(
data=tmp_data,
kind="box",
x="crop",
y="ground_water",
hue="scenario",
errorbar="sd",
# alpha=.6,
height=6,
color="r",
)
ax.despine(left=True)
ax.set_axis_labels("Month", "Groundwater withdraws ($m^3$)")
# g.legend.set_title("")
ax = sns.catplot(
data=tmp_data,
kind="box",
x="crop",
y="ground_water",
hue="scenario",
errorbar="sd",
# alpha=.6,
height=6,
color="r",
)
ax.despine(left=True)
ax.set_axis_labels("Month", "Groundwater withdraws ($m^3$)")
# g.legend.set_title("")
显著促进了玉米和小麦的地下水替代性开采,水稻受到的影响不大。
人均作物的关系¶
In [9]:
Copied!
total = data_98.groupby(["run_id", "crop", "year", "scenario"])["ground_water"].sum()
n_farmers = data_98.groupby(["run_id", "crop", "year", "scenario"])[
"ground_water"
].count()
tmp_data = (total / n_farmers).reset_index()
tmp_data.head()
total = data_98.groupby(["run_id", "crop", "year", "scenario"])["ground_water"].sum()
n_farmers = data_98.groupby(["run_id", "crop", "year", "scenario"])[
"ground_water"
].count()
tmp_data = (total / n_farmers).reset_index()
tmp_data.head()
Out[9]:
| run_id | crop | year | scenario | ground_water | |
|---|---|---|---|---|---|
| 0 | 197 | Maize | 1999 | True | 1,371,621.96 |
| 1 | 197 | Maize | 2000 | True | 1,248,431.83 |
| 2 | 197 | Maize | 2001 | True | 1,872,546.69 |
| 3 | 197 | Maize | 2002 | True | 1,753,643.92 |
| 4 | 197 | Maize | 2003 | True | 1,285,264.03 |
In [10]:
Copied!
ax = sns.catplot(
data=tmp_data,
kind="box",
x="crop",
y="ground_water",
hue="scenario",
errorbar="sd",
# alpha=.6,
height=6,
color="r",
)
ax.despine(left=True)
ax.set_axis_labels("Month", "Groundwater withdraws ($m^3$)")
# g.legend.set_title("")
ax = sns.catplot(
data=tmp_data,
kind="box",
x="crop",
y="ground_water",
hue="scenario",
errorbar="sd",
# alpha=.6,
height=6,
color="r",
)
ax.despine(left=True)
ax.set_axis_labels("Month", "Groundwater withdraws ($m^3$)")
# g.legend.set_title("")
分省分析地下水量有没有下降¶
In [21]:
Copied!
@with_axes(figsize=(5, 3))
def plot_provincial_groundwater_comparison(ax: Axes | None = None) -> Axes:
# 准备数据,只用98年以后的数据
tmp_data = (
data_98.groupby(["run_id", "province", "year", "scenario"])["ground_water"]
.sum()
.reset_index()
).sort_values("ground_water")
# 画图
ax = sns.boxplot(
data=tmp_data,
# kind="box",
y="province",
x="ground_water",
hue="scenario",
orient="h",
color="r",
ax=ax,
palette=palette,
)
# 修饰图片
ax.set_xticks(np.arange(0, 4.5, 0.5) * 1e9)
ax.set_ylabel("Province")
ax.set_xlabel("Groundwater withdraws ($m^3$)")
sns.despine(ax=ax, offset=10, trim=True)
# ax.legend(loc='lower left', title='Scenario')
ax.legend_.remove()
return ax
plot_provincial_groundwater_comparison()
@with_axes(figsize=(5, 3))
def plot_provincial_groundwater_comparison(ax: Axes | None = None) -> Axes:
# 准备数据,只用98年以后的数据
tmp_data = (
data_98.groupby(["run_id", "province", "year", "scenario"])["ground_water"]
.sum()
.reset_index()
).sort_values("ground_water")
# 画图
ax = sns.boxplot(
data=tmp_data,
# kind="box",
y="province",
x="ground_water",
hue="scenario",
orient="h",
color="r",
ax=ax,
palette=palette,
)
# 修饰图片
ax.set_xticks(np.arange(0, 4.5, 0.5) * 1e9)
ax.set_ylabel("Province")
ax.set_xlabel("Groundwater withdraws ($m^3$)")
sns.despine(ax=ax, offset=10, trim=True)
# ax.legend(loc='lower left', title='Scenario')
ax.legend_.remove()
return ax
plot_provincial_groundwater_comparison()
上述结果表明,基本上所有的省在的情况下,都存在地下水超采的情况。但一些省份,如山东、内蒙、宁夏、河南,就特别明显。
这些都是用水量多的大省,并且在经常违背地表水配额制度的省份。
整合作图¶
In [23]:
Copied!
fig = plt.figure(figsize=(7, 5), constrained_layout=True)
gs = fig.add_gridspec(ncols=2, nrows=2, width_ratios=[1.2, 1], hspace=0.0)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])
plot_surface_water_dynamic(ax=ax1)
compare_water_uses("ground_water", ax=ax2)
plot_provincial_groundwater_comparison(ax=ax3)
handles, labels = ax1.get_legend_handles_labels()
handles_2, labels_2 = ax3.get_legend_handles_labels()
fig.legend(
[*handles, *handles_2],
[*labels, *labels_2],
# ['Simulation', 'Scenario', '98-UBR', 'Simulation', 'Scenario'],
loc="center right",
ncol=2,
title="No 98-UBR Scenario",
bbox_to_anchor=(1, 0.41),
)
plt.show();
fig = plt.figure(figsize=(7, 5), constrained_layout=True)
gs = fig.add_gridspec(ncols=2, nrows=2, width_ratios=[1.2, 1], hspace=0.0)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])
plot_surface_water_dynamic(ax=ax1)
compare_water_uses("ground_water", ax=ax2)
plot_provincial_groundwater_comparison(ax=ax3)
handles, labels = ax1.get_legend_handles_labels()
handles_2, labels_2 = ax3.get_legend_handles_labels()
fig.legend(
[*handles, *handles_2],
[*labels, *labels_2],
# ['Simulation', 'Scenario', '98-UBR', 'Simulation', 'Scenario'],
loc="center right",
ncol=2,
title="No 98-UBR Scenario",
bbox_to_anchor=(1, 0.41),
)
plt.show();
用水效率的提升¶
In [13]:
Copied!
## 绘制每个省份的用水强度变化
from src.analysis import WaterAnalysis
## 绘制每个省份的用水强度变化
from src.analysis import WaterAnalysis
In [14]:
Copied!
bps = []
for algorithm in ["Dynp", "BottomUp"]:
bps.append(wa.batch_detect_breakpoints(algorithm=algorithm).rename(algorithm))
data = pd.concat(bps, axis=1)
fig, ax = plt.subplots(figsize=(3, 2.5))
ax.set_xlim(1983, 2012)
sns.kdeplot(data["Dynp"], ax=ax, color="red", fill=True, label="Break Points")
## fig2
wa = WaterAnalysis()
ax2 = ax.twinx()
sns.lineplot(x="Year", y="Maize", data=wa.wui, ax=ax2, label="Maize")
sns.lineplot(x="Year", y="Rice", data=wa.wui, ax=ax2, label="Rice")
sns.lineplot(x="Year", y="Wheat", data=wa.wui, ax=ax2, label="Wheat")
ax.set_ylabel("Trend break point")
ax2.set_ylabel("Water use intensity ($mm/km^2$)")
ax.set_xlabel("Year")
plt.show();
bps = []
for algorithm in ["Dynp", "BottomUp"]:
bps.append(wa.batch_detect_breakpoints(algorithm=algorithm).rename(algorithm))
data = pd.concat(bps, axis=1)
fig, ax = plt.subplots(figsize=(3, 2.5))
ax.set_xlim(1983, 2012)
sns.kdeplot(data["Dynp"], ax=ax, color="red", fill=True, label="Break Points")
## fig2
wa = WaterAnalysis()
ax2 = ax.twinx()
sns.lineplot(x="Year", y="Maize", data=wa.wui, ax=ax2, label="Maize")
sns.lineplot(x="Year", y="Rice", data=wa.wui, ax=ax2, label="Rice")
sns.lineplot(x="Year", y="Wheat", data=wa.wui, ax=ax2, label="Wheat")
ax.set_ylabel("Trend break point")
ax2.set_ylabel("Water use intensity ($mm/km^2$)")
ax.set_xlabel("Year")
plt.show();
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[14], line 3 1 bps = [] 2 for algorithm in ["Dynp", "BottomUp"]: ----> 3 bps.append(wa.batch_detect_breakpoints(algorithm=algorithm).rename(algorithm)) 4 data = pd.concat(bps, axis=1) 6 fig, ax = plt.subplots(figsize=(3, 2.5)) NameError: name 'wa' is not defined
地下水超用对地表水下降的贡献率¶
1998 年后,引黄河地表水的下降可能有两个原因:
- 因为地下水替代了地表水
- 因为用水效率上升
我们可以通过地表水下降量中,减去地下水的上升量,得到用水效率提升带来的净下降量,从而区分两者的贡献率。
例如,对于某个空间范围内的地表用水量数据 $SW$,我们可以计算用水效率提升的贡献率 $\delta$:
$$ \delta = \frac{\Delta SW_{scenario}}{\Delta SW_{actual}} $$
In [ ]:
Copied!
def select_data_range(data, yr_min, yr_max):
return data[(data["year"] >= yr_min) & (data["year"] <= yr_max)]
def select_data_range(data, yr_min, yr_max):
return data[(data["year"] >= yr_min) & (data["year"] <= yr_max)]
In [ ]:
Copied!
sw_actual_0 = select_data_range(data_actual, 1995, 1998)["surface_water"].mean()
sw_scenario_0 = select_data_range(data_scenario, 1995, 1998)["surface_water"].mean()
sw_actual_1 = select_data_range(data_actual, 2009, 2011)["surface_water"].mean()
sw_scenario_1 = select_data_range(data_scenario, 2009, 2011)["surface_water"].mean()
delta_scenario = sw_scenario_1 - sw_scenario_0
delta_actual = sw_actual_1 - sw_actual_0
delta = delta_scenario / delta_actual
f"用水效率提升对黄河地表用水下降贡献了{round(delta * 100, 2)}。"
sw_actual_0 = select_data_range(data_actual, 1995, 1998)["surface_water"].mean()
sw_scenario_0 = select_data_range(data_scenario, 1995, 1998)["surface_water"].mean()
sw_actual_1 = select_data_range(data_actual, 2009, 2011)["surface_water"].mean()
sw_scenario_1 = select_data_range(data_scenario, 2009, 2011)["surface_water"].mean()
delta_scenario = sw_scenario_1 - sw_scenario_0
delta_actual = sw_actual_1 - sw_actual_0
delta = delta_scenario / delta_actual
f"用水效率提升对黄河地表用水下降贡献了{round(delta * 100, 2)}。"
Out[ ]:
'用水效率提升对黄河地表用水下降贡献了60.32。'
研究结果表明:
- 98 年的制度改革,在实际上是促进了地下水的开采。每年开采平均比实际的情景多了 xxx。
- 其中 xx 省,xx省
- 但 WUI 的下降对用水量减少的贡献更多,1998年至2012年,节水改造对黄河引水量下降的贡献率是65%,超过地下水的替代性开采。
- 大多数城市的用水强度转折都发生在 2000 年前后