pandas时间序列及绘图

1. 由字符串格式生成时间数据

1
2
3
4
5
6
7
8
9
import pandas as pd
import numpy as np

# string datetime --> pandas datetime
dt_start = '2018-07-04 12:00'
pd_dt_start = pd.datetime.strptime(dt_start, '%Y-%m-%d %H:%M')

print(dt_start) # 2018-07-04 12:00
print(pd_dt_start) # 2018-07-04 12:00:00

2. 生成时间序列

1
2
3
4
5
6
7
8
9
10
# pandas datetime range
dt_list = pd.date_range('2018-07-04 09:00', '2018-07-05 09:00', freq='H')
print(dt_list)

# 构建 DataFrame
df = pd.DataFrame(dt_list, columns=['Time']) # 将时间序列作为 'Time' 列
df['Temp'] = np.random.normal(30, 5, size=len(dt_list)) # 添加一列温度数据,'Temp'
print(df)

df.to_csv('data.csv') # 保存为csv文件

3. 读取文件中的时间

1
2
3
4
5
6
7
# csv file datetime --> pandas datetime
df = pd.read_csv('data.csv') # 读取csv文件
print(df['Time'].head()) # dtype应该是datetime64[ns]; 如dtype是object,需转换一下

df['Time'] = pd.to_datetime(df['Time']) # 调用pd.to_datetime 转换
print(df['Time'].head())

4. 数据筛选

1
2
3
4
5
df = df[['Time', 'Temp']].dropna()      # 去除异常数据
df = df[df['Time'] >= pd_dt_start] # 时间筛选
df = df[df['Temp'] <= 35.0] # 温度筛选

print(df['Time'].head())

5. 绘图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.dates as dates

plt.figure(figsize=(12, 8))
plt.plot(df['Time'], df['Temp'])
plt.xlabel('Time')
plt.ylabel('Temp (C)')

# 自定义刻度
ax = plt.gca()
ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) # 主刻度为 每天
ax.xaxis.set_major_formatter(dates.DateFormatter('00\n%Y-%m-%d'))
ax.xaxis.set_minor_locator(dates.HourLocator(interval=1)) # 副刻度为 每小时
ax.xaxis.set_minor_formatter(dates.DateFormatter('%H'))

ax.yaxis.set_major_locator(ticker.MultipleLocator(10))

plt.show()

时间字符串格式

ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes

Directive Meaning Example
%a Weekday Sun, Mon, …, Sat (en_US)
%A Weekday full name. Sunday, Monday, …, Saturday (en_US)
%w Weekday as a decimal number, where 0 is Sunday . 0, 1, …, 6
%d Day of the month as a zero-padded decimal number. 01, 02, …, 31
%b Month as locale’s abbreviated name. Jan, Feb, …, Dec (en_US);
%B Month as locale’s full name. January, February, …, December (en_US);
%m Month as a zero-padded decimal number. 01, 02, …, 12
%y Year without century as a zero-padded decimal number. 00, 01, …, 99
%Y Year with century as a decimal number. 0001, 0002, …, 2013, 2014, …, 9998, 9999
%H Hour (24-hour clock) as a zero-padded decimal number. 00, 01, …, 23
%I Hour (12-hour clock) as a zero-padded decimal number. 01, 02, …, 12
%p Locale’s equivalent of either AM or PM. AM, PM (en_US);
%M Minute as a zero-padded decimal number. 00, 01, …, 59
%S Second as a zero-padded decimal number. 00, 01, …, 59
%f Microsecond as a decimal number, zero-padded to 6 digits. 000000, 000001, …, 999999
%z UTC offset in the form ±HHMM[SS[.ffffff]] (empty), +0000, -0400, +1030, +063415, -030712.345216
%Z Time zone name (empty string if the object is naive). (empty), UTC, GMT
%j Day of the year as a zero-padded decimal number. 001, 002, …, 366
%U Week number of the year (Sunday as the first day) 00, 01, …, 53
%W Week number of the year (Monday as the first day) 00, 01, …, 53
%c Locale’s appropriate date and time representation. Tue Aug 16 21:30:00 1988 (en_US);
%x Locale’s appropriate date representation. 08/16/88 (None);
%X Locale’s appropriate time representation. 21:30:00 (en_US);
%% A literal ‘%’ character. %
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# === 1. 获取当前时间,返回指定的字符串 ===
import time
now = time.strftime('%Y-%m-%d %H:%M:%S')

print(now)

from datetime import datetime
now = datetime.now().strftime('%Y-%m-%d %a %H:%M:%S')

print(now)


# === 2. 将时间字符串, 转化为时间对象 ===

from datetime import datetime
dt = datetime.strptime('2023-01-01 12:00:00', '%Y-%m-%d %H:%M:%S')

import pandas as pd
pd_dt = pd.to_datetime('2023-01-01 12:00', format='%Y-%m-%d %H:%M')

print(dt)
print(pd_dt)