pandas时间序列及绘图

1. 由字符串格式生成时间数据

1
2
3
4
5
6
7
8
9
import pandas as pd
import numpy as np

# string datetime --> pandas datetime
dt_start = '2018-07-04 12:00'
pd_dt_start = pd.datetime.strptime(dt_start, '%Y-%m-%d %H:%M')

print(dt_start) # 2018-07-04 12:00
print(pd_dt_start) # 2018-07-04 12:00:00

2. 生成时间序列

1
2
3
4
5
6
7
8
9
10
# pandas datetime range
dt_list = pd.date_range('2018-07-04 09:00', '2018-07-05 09:00', freq='H')
print(dt_list)

# 构建 DataFrame
df = pd.DataFrame(dt_list, columns=['Time']) # 将时间序列作为 'Time' 列
df['Temp'] = np.random.normal(30, 5, size=len(dt_list)) # 添加一列温度数据,'Temp'
print(df)

df.to_csv('data.csv') # 保存为csv文件

3. 读取文件中的时间

1
2
3
4
5
6
7
# csv file datetime --> pandas datetime
df = pd.read_csv('data.csv') # 读取csv文件
print(df['Time'].head()) # dtype应该是datetime64[ns]; 如dtype是object,需转换一下

df['Time'] = pd.to_datetime(df['Time']) # 调用pd.to_datetime 转换
print(df['Time'].head())

4. 数据筛选

1
2
3
4
5
df = df[['Time', 'Temp']].dropna()      # 去除异常数据
df = df[df['Time'] >= pd_dt_start] # 时间筛选
df = df[df['Temp'] <= 35.0] # 温度筛选

print(df['Time'].head())

5. 绘图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.dates as dates

plt.figure(figsize=(12, 8))
plt.plot(df['Time'], df['Temp'])
plt.xlabel('Time')
plt.ylabel('Temp (C)')

# 自定义刻度
ax = plt.gca()
ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) # 主刻度为 每天
ax.xaxis.set_major_formatter(dates.DateFormatter('00\n%Y-%m-%d'))
ax.xaxis.set_minor_locator(dates.HourLocator(interval=1)) # 副刻度为 每小时
ax.xaxis.set_minor_formatter(dates.DateFormatter('%H'))

ax.yaxis.set_major_locator(ticker.MultipleLocator(10))

plt.show()