import pandas as pd
from datetime import datetime as dt, timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter # 時間軸のフォーマットを自在に
import numpy as np
import os
def get_segmented_rssi(day, point):
# データ読み込み
#day = "20191005"
#point = "TTRI17"
cut_range = list(range(-100, -30,5))
labels = list(range(-100, -35, 5))
filename = "/home/toyotamngr/csv/toyota/points/%s/daily/%s_mon.csv" % (point, day)
toyota_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
toyota_cut = pd.cut(toyota_df.rssi, cut_range, labels=labels).value_counts()
tmp_df = pd.DataFrame(toyota_cut).reset_index()
freq_df = tmp_df.sort_values("index")
freq_df = freq_df.rename(columns={"index":"rssi", "rssi":day})
freq_df["rate"+day] = freq_df[day]/freq_df[day].sum()
freq_df =freq_df.astype({"rssi": int})
return freq_df
def get_hourly_segmented_rssi(day, point):
# 1時間ごとのrssi強度分布を得る
cut_range = list(range(-100, -30,5))
labels = list(range(-100, -35, 5))
filename = "/home/toyotamngr/csv/toyota/points/%s/daily/%s_mon.csv" % (point, day)
toyota_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
# 1時間ごとに分割
toyota_df["datetime"] = toyota_df.unix_time.apply(lambda x: datetime.fromtimestamp(x))
toyota_df["hour"] = toyota_df['datetime'].dt.hour
freq_df = pd.DataFrame({"rssi": labels})
for i in range(24):
temp_df = toyota_df[toyota_df['hour']==i]
temp_df = pd.DataFrame(pd.cut(temp_df.rssi, cut_range, labels=labels)
.value_counts()).reset_index().sort_values("index")
temp_df = temp_df.rename(columns={"index": "rssi", "rssi": str(i)})
freq_df = pd.merge(freq_df, temp_df, on="rssi")
freq_df =freq_df.astype({"rssi": int})
return freq_df
def get_rssi_ave_med(day, point):
# 日、地点指定でrssiの平均値、中央値を計算
filename = "/home/toyotamngr/csv/toyota/points/%s/daily/%s_mon.csv" % (point, day)
if not os.path.isfile(filename):
return {"average": np.nan, "median": np.nan}
toyota_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
return {"average": toyota_df["rssi"].mean(), "median": toyota_df["rssi"].median()}
def get_hourly_rssi_ave_med(day, point, glbit=1):
# 指定日の時間ごと地点ごとのrssiの平均値、中央値を計算
# glbit=1ならランダムアドレスのみ、それ以外はすべて
filename = "/home/toyotamngr/csv/toyota/points/%s/daily/%s_mon.csv" % (point, day)
if not os.path.isfile(filename):
return {"average": np.nan, "median": np.nan}
toyota_df = pd.read_csv(filename, sep=",", names=["unix_time","mac", "freq", "rssi", "mac_resolv", "glbit"])
if glbit==1:
toyota_df = toyota_df[toyota_df["glbit"]==1]
# 1時間ごとに分割
toyota_df["datetime"] = toyota_df.unix_time.apply(lambda x: dt.fromtimestamp(x))
toyota_df["hour"] = toyota_df['datetime'].dt.hour
rssi_mean = pd.DataFrame(toyota_df.groupby("hour").mean())
rssi_mean["size"] = toyota_df.groupby("hour").size()
#result = pd.merge(rssi_mean, total_num, on="hour")
return rssi_mean[{"rssi", "size"}]
# return {"average": toyota_df["rssi"].mean(), "median": toyota_df["rssi"].median()}
# 毎回行うのは 時間がかかるから何かのファイルにとっておいた方が良いだろう
result = {}
sday = "20191001"
eday = "20191231"
day = sday
while day <= eday:
result[day] = get_rssi_ave_med(day, "TTRI17")
day = dt.strftime(dt.strptime(day, "%Y%m%d") + timedelta(days=1), "%Y%m%d")
rssi_ave_med_df = pd.DataFrame(result)
rssi_ave_med_df = rssi_ave_med_df.T
fig = plt.figure(figsize=(15,4))
ax = fig.add_subplot(1, 1, 1)
rssi_ave_med_df.plot(ax=ax)
rssi_ave_med_df.sort_values("average", ascending = False).head(20)
day1 = "20191005"
query1 = get_segmented_rssi(day1, "TTRI17")
day2 = "20191006"
query2 = get_segmented_rssi(day2, "TTRI17")
toyota_all = pd.merge(query1, query2, on="rssi")
toyota_all = toyota_all.astype({"rssi": int})
toyota_all
# 複数地点のrssi強度分布を並べて作図
def plot_rssi_distribution(points, days, fig_size=(20,16), col_num_ini=3, rate=False, svg=False):
import matplotlib.pyplot as plt
fig = plt.figure( figsize=fig_size)
if len(points) < col_num_ini:
col_num = len(points)
row_num = 1
else:
col_num = col_num_ini
row_num = int(len(points) / col_num_ini) + 1
if rate: # 比率を書くか、生のパケット数を書くか
col_pre = "rate"
else:
col_pre = ""
colorlist = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33', '#a65628', '#f781bf']
for fig_num, point in enumerate(points):
ax = fig.add_subplot(row_num, col_num, fig_num+1)
result = get_segmented_rssi(days[0], point)
result.plot(x="rssi", y=col_pre + days[0], ax = ax,
title = "rssi強度分布(" + point + ")")
for i in range(1, len(days)):
result = get_segmented_rssi(days[i], point)
result.plot(x="rssi", y = col_pre + days[i], ax = ax)
if svg: # svg形式ファイルに保存
outfile_body = "rssi_distrib"
plt.savefig(outfile_body + ".svg", , bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
return True
points = ["TTRI01", "TTRI03", "TTRI13", "TTRI17", "TTRI18", "TTRI20"]
days = ["20191005", "20191006", "20191207", "20191208"]
plot_rssi_distribution(points, days, rate=False)
# 比率で表示
points = ["TTRI01", "TTRI03", "TTRI13", "TTRI17", "TTRI18", "TTRI20"]
days = ["20191005", "20191006", "20191207", "20191208"]
plot_rssi_distribution(points, days, rate=True)
# 時間ごと(列)のrssiのビンカウント
dt1005 = get_hourly_segmented_rssi("20191005", "TTRI17")
dt1005 = dt1005.set_index("rssi")
dt1005
from datetime import datetime
def plot_rssi_packets(points, days, title_str="", fig_size=(20,14), col_num_ini = 3, svg = False):
'''
時間ごとのrssi平均とパケット数との関係の描画関数
時間をインデクスにして、各時間のrssi平均値と総数を求め、散布図を作成
col_num: 散布図の列数
データがない地点があるとエラーになる
'''
import matplotlib.pyplot as plt
fig = plt.figure( figsize=fig_size)
if len(points) < col_num_ini:
col_num = len(points)
row_num = 1
else:
col_num = col_num_ini
row_num = int(len(points) / col_num_ini) + 1
colorlist = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33', '#a65628', '#f781bf']
for fig_num, point in enumerate(points):
ax = fig.add_subplot(row_num, col_num, fig_num+1)
#データ加工関数は上で定義
hourly_data = (get_hourly_rssi_ave_med(days[0],point,glbit=0)
.rename(columns={"rssi": "rssi_"+days[0], "size": "size_"+days[0]}))
#print(result.atype)
hourly_data.plot.scatter(x="rssi_" + days[0], y="size_"+ days[0], label=days[0],
color=colorlist[0],
ax = ax)
cidx = 1
for d in days[1:]:
hourly_data2 = get_hourly_rssi_ave_med(d,point, glbit=0).rename(columns={"rssi": "rssi_"+d,
"size": "size_"+d})
hourly_data = pd.merge(hourly_data, hourly_data2, on="hour")
hourly_data.plot.scatter(x="rssi_" + d, y="size_"+ d,
color=colorlist[cidx],
label=d,
ax = ax)
cidx+=1
ax.set_title(title_str + "(" + point + ")")
ax.set_xlabel("rssi強度(db)の平均")
ax.set_ylabel("パケット数")
if svg: # svg形式ファイルに保存
outfile_body = "rssi_packets"
plt.savefig(outfile_body + ".svg", , bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
return 0
# 多いと時間がかかる
points = ["TTRI01", "TTRI03", "TTRI13", "TTRI17", "TTRI18", "TTRI20"]
days = ["20191005", "20191006", "20191007", "20191123", "20191207", "20191208", "20191209"]
title_str = "時間ごとのrssi平均とパケット数との関係"
plot_rssi_packets(points, days, title_str, fig_size=(20,16))
#hourly_data
# 1地点だけなら時間はかからない
point = ["TTRI20"]
days = ["20191005", "20191006", "20191007", "20191123", "20191207", "20191208", "20191209"]
title_str = "時間ごとのrssi平均とパケット数との関係"
plot_rssi_packets(point, days, title_str, fig_size= (6,6))
使う関数を記したスクリプトは同じディレクトリにplotFunctions.pyという名前で作っておく。(末尾に載せておく)
%reload_ext autoreload
from plotFunctions import *
plotPointsHourlyData(["TTRI01"], "20191005", area="ttri" )
# ある期間のアドレス数(hourlyCounts)のデータを取得して描画 (plotFunctionsで定義した関数)
plotDailyData(["TTRI17"], "20191001", "20191030", area="ttri", figsize=[12,4])
plotDailyData(["TTRI17"], "20191101", "20191130", area="ttri", figsize=[12,4])
plotDailyData(["TTRI17"], "20191201", "20191231", area="ttri", figsize=[12,4])
# 複数日の時間ごとデータの取得関数
def getPointsHourlyData(day, points, addr='a', area='kofu', figsize=[6, 4]):
'''
addrは、deviceのこと! (ff, hakushuではlocal+global)
'''
import pandas as pd
import requests
json_root = "https://8tops.yamanashi.ac.jp/"
resp = requests.get(json_root + area + "/"
+ "getHourlyCounts.php"
+ "?sdate=" + day
+ "&edate=" + day
+ "&addr=" + addr)
json_dict = resp.json()
plot_data = {}
# プロットするデータをDataFrameに: 数値に変換する必要があることに注意
for pt in points:
if not pt in json_dict.keys():
continue
plot_data[pt] = list(
map(float, json_dict[pt][day]['hourly']))
return pd.DataFrame(plot_data)
# テスト出力
getPointsHourlyData("20191130", ["TTRI01", "TTRI20"], area="ttri").head()
以下の内容を、地点(point)ごとに描画する
import pandas as pd
def plot_rssi_dwellNum(points, days, title_str="", fig_size=(20,14), col_num_ini = 3, svg=False):
'''
地点(point)の時間ごと滞在者数と平均rssi強度の散布図作成 (複数日データ)
'''
import matplotlib.pyplot as plt
colorlist = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33', '#a65628', '#f781bf']
# 図の並びに関するパラメータ
fig = plt.figure( figsize=fig_size)
if len(points) < col_num_ini:
col_num = len(points)
row_num = 1
else:
col_num = col_num_ini
row_num = int(len(points) / col_num_ini) + 1
for fig_num, point in enumerate(points):
ax = fig.add_subplot(row_num, col_num, fig_num+1)
#データ加工関数は上で定義
hourly_data = (get_hourly_rssi_ave_med(days[0],point,glbit=0)
.rename(columns={"rssi": "rssi_"+days[0],
"size": "size_"+days[0]}))
hourlyDwellNum = (getPointsHourlyData(day, [point], area="ttri")
.reset_index().rename(columns={"index": "hour",
point: "dwellNum_"+days[0]}))
hourly_data = pd.merge(hourly_data, hourlyDwellNum, on="hour")
hourly_data.plot.scatter(x="rssi_" + days[0], y="dwellNum_"+ days[0], label=days[0],
color=colorlist[0], ax=ax)
cidx=1
for d in days[1:]:
hourly_data2 = (get_hourly_rssi_ave_med(d,point, glbit=0)
.rename(columns={"rssi": "rssi_"+d,
"size": "size_"+d}))
hourlyDwellNum = (getPointsHourlyData(d, [point], area="ttri")
.reset_index().rename(columns={"index": "hour", point: "dwellNum_" +d}))
hourly_data = pd.merge(hourly_data, hourlyDwellNum, on="hour")
hourly_data = pd.merge(hourly_data, hourly_data2, on="hour")
hourly_data.plot.scatter(x="rssi_" + d, y="dwellNum_"+ d, ax=ax,
color=colorlist[cidx],
label=d)
cidx+=1
ax.set_title(title_str + "(" + point + ")")
ax.set_xlabel("rssi強度(db)の平均")
ax.set_ylabel("Wi-Fiアドレス数")
if svg: # svg形式ファイルに保存
outfile_body = "rssi_dwellNum"
plt.savefig(outfile_body + ".svg", , bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
return 0
days = ["20191005", "20191006", "20191007", "20191123", "20191207", "20191208", "20191209"]
# points = ["TTRI01", "TTRI03"]
points = ["TTRI01", "TTRI03", "TTRI13", "TTRI17", "TTRI18", "TTRI20"]
plot_rssi_dwellNum(points, days, "滞在者数 vs. 平均rssi強度", fig_size=(20,16))
plotFunctions.pyの内容
# 処理全体のクラススクリプト
json_root = "https://8tops.yamanashi.ac.jp/"
import json
import requests
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'IPAPGothic' # 全体のフォントを設定
import numpy as np
from dateutil.relativedelta import relativedelta
import os
import csv
def plotPointsHourlyData(points, day, addr='a', area='kofu', figsize=[6, 4]):
'''
addrは、deviceのこと! (ff, hakushuではlocal+global)
'''
resp = requests.get(json_root + area + "/"
+ "getHourlyCounts.php"
+ "?sdate=" + day
+ "&edate=" + day
+ "&addr=" + addr)
json_dict = resp.json()
plot_data = {}
# プロットするデータをDataFrameに: 数値に変換する必要があることに注意
pointAttrib = getPointAttribute(area)
for pt in points:
if not pt in json_dict.keys():
continue
plot_data[pointAttrib[pt]['短縮名']] = list(
map(float, json_dict[pt][day]['hourly']))
df4plot = pd.DataFrame(plot_data)
# プロット
fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(1, 1, 1)
ax = df4plot.plot(ax=ax, fontsize=12)
# 凡例は外側に固定
ax.legend(fontsize=12, loc='center left',
bbox_to_anchor=(1.05, 0.5), borderaxespad=0)
ax.set_title("地点アドレス数 (" + day + ")",
fontsize=12)
ax.set_xlabel("時間", fontsize=12)
ax.set_ylabel("アドレス数", fontsize=12)
plt.show()
def plotDailyData(points, sday, eday, addr='a', area='kofu', out="",
figsize=[6, 4]):
"""
指定したエリア内のpointsのデータを作図
points: 地点の単純なリスト
"""
resp = requests.get(json_root + area + "/"
+ "getHourlyCounts.php"
+ "?sdate=" + sday
+ "&edate=" + eday
+ "&addr=" + addr)
json_dict = resp.json()
plot_data = {}
pointAttrib = getPointAttribute(area)
for pt in json_dict:
if not pt in points:
continue
plot_data[pointAttrib[pt]['短縮名']] = {}
for day in json_dict[pt]:
day_by_datetime = datetime.strptime(day, "%Y%m%d")
plot_data[pointAttrib[pt]['短縮名']][day_by_datetime] = 0
for h in json_dict[pt][day]['hourly']:
plot_data[pointAttrib[pt]['短縮名']][day_by_datetime] += int(h)
df4plot = pd.DataFrame(plot_data)
# プロット
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(1, 1, 1)
ax = df4plot.plot(ax=ax, fontsize=12, lw=2)
# 凡例は外側に固定
ax.legend(fontsize=12, loc='center left',
bbox_to_anchor=(1.05, 0.5), borderaxespad=0)
ax.set_title("地点アドレス数 (" + sday + "〜" + eday + ")",
fontsize=12)
ax.set_xlabel("日", fontsize=12)
ax.set_ylabel("アドレス数", fontsize=12)
if out == "file":
outfile_body = area + sday
plt.savefig(outfile_body + ".svg", bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
else:
plt.show()
def plotDailyDataByGroup(points, sday, eday, addr='a', area='kofu', out="",
ylimit=0, fig_col=2):
"""
何枚もの図を並べて書きたい場合にも対応
points: [{group_name: グループ名, points: [point1, point2, ...]}, ...]
"""
resp = requests.get(json_root + area + "/"
+ "getHourlyCounts.php"
+ "?sdate=" + sday
+ "&edate=" + eday
+ "&addr=" + addr)
json_dict = resp.json()
plot_data = {}
pointAttrib = getPointAttribute(area)
# fig_col = 2
fig_row = int(len(points) / fig_col + 1)
fig = plt.figure(figsize=(8*fig_col, 5*fig_row))
# 余白を設定
plt.subplots_adjust(wspace=0.6, hspace=0.3, right=0.85)
fig_num = 0
for group in points:
plot_data = {}
for pt in group['points']:
if not pt in json_dict:
continue
plot_data[pointAttrib[pt]['短縮名']] = {}
for day in json_dict[pt]:
day_by_datetime = datetime.strptime(day, "%Y%m%d")
plot_data[pointAttrib[pt]['短縮名']][day_by_datetime] = 0
for h in json_dict[pt][day]['hourly']:
plot_data[pointAttrib[pt]['短縮名']
][day_by_datetime] += int(h)
if len(plot_data) == 0:
continue
df4plot = pd.DataFrame(plot_data)
# プロット
fig_num += 1
sub_fig = fig.add_subplot(fig_row, fig_col, fig_num)
ax = df4plot.plot(ax=sub_fig, fontsize=12, lw=2)
# 凡例は外側に固定
ax.legend(fontsize=12, loc='center left',
bbox_to_anchor=(1.05, 0.5), borderaxespad=0)
ax.set_title(group['group_name'] + " (" + sday + "〜" + eday + ")",
fontsize=12)
# ax.set_xlabel("日", fontsize=12)
ax.set_ylabel("アドレス数", fontsize=12)
if ylimit != 0:
ax.set_ylim(0, int(ylimit))
if out == "file":
outfile_body = area + sday
plt.savefig(outfile_body + ".svg", bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
else:
plt.show()
def plotMonthlyData4point(point, point_name,
smonth, emonth, addr='a', area="kofu", out="",
doubleFig=False):
'''
地点ごとの1日の時間変化を月ごとに集計表示
doubleFig=Trueは、自動で6ヶ月ごとに分けて表示)
'''
baseDir = ("/home/raspimngr/csv/" + area + "/points/"
+ point + "/hourly_count/" + "hourly_")
sday = "01"
eday = {"01": "31", "02": "28", "03": "31", "04": "30", "05": "31", "06": "30",
"07": "31", "08": "31", "09": "30", "10": "31", "11": "30", "12": "31"}
dev_str = ""
if addr == "a":
dev_str = "_all"
plot_data = {}
month = smonth
dt_month = datetime.strptime(smonth+"01", "%Y%m%d")
while month <= emonth:
sdate = month + sday
edate = month + eday[month[-2:]]
plot_data[month] = np.zeros((3, 24)) # global, global, total
n_obs_days = 0
for i in range(1, int(eday[month[-2:]])+1):
filename = (baseDir + month
+ '{:02d}'.format(i) + dev_str + "_" + point + ".csv")
if not os.path.exists(filename):
continue
with open(filename, 'r') as f:
n_obs_days += 1.0
reader = csv.reader(f)
for line in reader:
plot_data[month][0][int(line[0])] += int(line[1])
# plot_data[month][1][int(line[0])] += int(line[2]) # 最近のデータはlocal, globalが記録されているが201904以前はない
# plot_data[month][2][int(line[0])] += int(line[1]) + int(line[2])
if n_obs_days == 0:
plot_data.pop(month)
else:
plot_data[month] = plot_data[month] / n_obs_days # 1日当たりの数にする
dt_month = dt_month + relativedelta(months=1)
month = datetime.strftime(dt_month, "%Y%m")
# plot
if len(plot_data) == 0:
return
if doubleFig:
num_fig = int((len(plot_data)-1) / 6) + 1
else:
num_fig = 1
if num_fig > 1:
fig = plt.figure(figsize=(14, 5))
else:
fig = plt.figure(figsize=(6.3, 5))
num_fig = 1
ax1 = fig.add_subplot(1, num_fig, 1)
if num_fig > 1:
ax2 = fig.add_subplot(1, num_fig, 2)
num = 0
for m, d in plot_data.items():
if num < 6 or doubleFig == False:
ax1.plot(range(24), d[0], label=m, lw=2)
else:
ax2.plot(range(24), d[0], label=m, lw=2)
num += 1
ax1.legend()
ax1.set_title(point_name, fontsize=12)
ax1.set_xlabel("時刻", fontsize=12)
ax1.set_ylabel("アドレス数/日", fontsize=12)
if num_fig > 1:
ax2.legend()
ax2.set_title(point_name,
fontsize=12)
ax2.set_xlabel("時刻", fontsize=12)
ax2.set_ylabel("アドレス数/日", fontsize=12)
if out == "file":
outfile_body = point_name + "_hourly"
plt.savefig(outfile_body + ".svg", bbox_inches="tight")
import subprocess
subprocess.run("inkscape --file " + outfile_body + ".svg"
+ " --export-emf " + outfile_body + ".emf", shell=True)
else:
plt.show()
def getPointAttribute(area):
filename = {"kofu": "/var/www/html/kofu/kofu_position.csv",
"ff": "/var/www/html/ff/sensor_points.csv",
"hakushu": "/home/toyoki/public_html/hakushu/points_hakushu.csv",
"ttri": "/home/toyotamngr/csv/toyota/sensor_points.csv"}
df = pd.read_csv(filename[area])
return df.set_index('センサ名').T.to_dict()