登录
首页精彩阅读数据挖掘:pandas时间模块管理
数据挖掘:pandas时间模块管理
2020-06-10
收藏

pandas datetime

# datetime.timedelta  时间差 

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)
2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00
2019-05-26 

2019-05-26 
# datetime.timedelta  时间差 

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)
2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00
# datetime.timedelta  时间差 

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)
2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00

pd.Timestamp 跟datetime作用相同 只不过Timestamp是pd模块里的

time_list1 = ["20171019", "20181020", "bbbb", "20191021"]

t1= pd.to_datetime(time_list1, errors="ignore")
print(t1, type(t1))
print("")

t2 = pd.to_datetime(time_list1, errors="coerce")
print(t2)
Index(['20171019', '20181020', 'bbbb', '20191021'], dtype='object') pandas.core.indexes.base.index'="">

DatetimeIndex(['2017-10-19', '2018-10-20', 'NaT', '2019-10-21'], dtype='datetime64[ns]', freq=None)
2020-06-01 14:28:08.656056

2020-06-01 14:28:08.656056

2017-10-21 00:00:00

pd.to_datetime 多个时间数据转换成时间戳索引

rng = pd.DatetimeIndex(["20160910", "11/06/2017", "20180821", "26/05/2019"])
print(rng)
print(type(rng))
print("")

print(rng[0], type(rng[0]))
DatetimeIndex(['2016-09-10', '2017-11-06', '2018-08-21', '2019-05-26'], dtype='datetime64[ns]', freq=None)
pandas.core.indexes.datetimes.datetimeindex'="">

2016-09-10 00:00:00 pandas._libs.tslibs.timestamps.timestamp'="">

如果时间序列里包含非时间的数据 ignore(忽略异常 但不会转换成时间戳索引) coerce(把异常值改为NaT)

st = pd.Series(np.random.rand(4), index=rng)  # 把时间戳索引当成index
print(st)
2016-09-10    0.835586
2017-11-06    0.223044
2018-08-21    0.950717
2019-05-26    0.013370
dtype: float64

pd.DatetimeIndex() 直接生成时间戳序列

st = pd.Series(np.random.rand(4), index=rng)  # 把时间戳索引当成index
print(st)
2016-09-10    0.835586
2017-11-06    0.223044
2018-08-21    0.950717
2019-05-26    0.013370
dtype: float64
st = pd.Series(np.random.rand(4), index=rng)  # 把时间戳索引当成index
print(st)
2016-09-10    0.835586
2017-11-06    0.223044
2018-08-21    0.950717
2019-05-26    0.013370
dtype: float64

pd.date_range() 生成日期范围

t_index2 = pd.date_range(start="20181018", periods=10, name="t_index2")
print(t_index2)
DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
               '2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
               '2018-10-26', '2018-10-27'],
              dtype='datetime64[ns]', name='t_index2', freq='D')

start end

t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)
DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
               '2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
               '2018-10-17', '2018-10-18'],
              dtype='datetime64[ns]', name='t_index3', freq='D')

periods

t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)
DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
               '2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
               '2018-10-17', '2018-10-18'],
              dtype='datetime64[ns]', name='t_index3', freq='D')
t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)
DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
               '2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
               '2018-10-17', '2018-10-18'],
              dtype='datetime64[ns]', name='t_index3', freq='D')

name normalize

t_index6 = pd.bdate_range(start="20191001", end="20191007", name="t_index6")
print(t_index6)
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-07'],
              dtype='datetime64[ns]', name='t_index6', freq='B')
DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17'],
              dtype='datetime64[ns]', name='t_index5', freq='D')

closed

t_index7_list= pd.date_range(start="20191001", end="20191007", name="t_index7_list")
print(t_index7_list)
print("\n")

t_index7_list= list(pd.date_range(start="20191001", end="20191007", name="t_index7_list"))
print(t_index7_list)
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07'],
              dtype='datetime64[ns]', name='t_index7_list', freq='D')
DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17'],
              dtype='datetime64[ns]', name='t_index5', freq='D')
DatetimeIndex(['2019-09-11', '2019-09-12', '2019-09-13', '2019-09-14',
               '2019-09-15', '2019-09-16', '2019-09-17', '2019-09-18'],
              dtype='datetime64[ns]', name='t_index5', freq='D')

pd.bdate_range() 默认频率为工作日

# 默认freq = 'D' 每日

pd.date_range("10/1/2019", "2019/10/7")  
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-05', '2019-10-06', '2019-10-07'],
              dtype='datetime64[ns]', freq='D')

pd.date_range 转换成list 元素为时间戳Timestamp

# 'B' 每工作日

pd.date_range("10/01/2019", "10/07/2019", freq = "B")  
DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
               '2019-10-07'],
              dtype='datetime64[ns]', freq='B')
[Timestamp('2019-10-01 00:00:00', freq='D'), Timestamp('2019-10-02 00:00:00', freq='D'), Timestamp('2019-10-03 00:00:00', freq='D'), Timestamp('2019-10-04 00:00:00', freq='D'), Timestamp('2019-10-05 00:00:00', freq='D'), Timestamp('2019-10-06 00:00:00', freq='D'), Timestamp('2019-10-07 00:00:00', freq='D')]

fred 日期偏移量

# H 每小时

pd.date_range("10/01/2019  12:00:00", "10/02/2019 12:00:00", freq = "H")  
DatetimeIndex(['2019-10-01 12:00:00', '2019-10-01 13:00:00',
               '2019-10-01 14:00:00', '2019-10-01 15:00:00',
               '2019-10-01 16:00:00', '2019-10-01 17:00:00',
               '2019-10-01 18:00:00', '2019-10-01 19:00:00',
               '2019-10-01 20:00:00', '2019-10-01 21:00:00',
               '2019-10-01 22:00:00', '2019-10-01 23:00:00',
               '2019-10-02 00:00:00', '2019-10-02 01:00:00',
               '2019-10-02 02:00:00', '2019-10-02 03:00:00',
               '2019-10-02 04:00:00', '2019-10-02 05:00:00',
               '2019-10-02 06:00:00', '2019-10-02 07:00:00',
               '2019-10-02 08:00:00', '2019-10-02 09:00:00',
               '2019-10-02 10:00:00', '2019-10-02 11:00:00',
               '2019-10-02 12:00:00'],
              dtype='datetime64[ns]', freq='H')
 # T/MIN 每分

pd.date_range("10/01/2019 12:10:00" , "10/01/2019 12:30:00", freq = "T") 
DatetimeIndex(['2019-10-01 12:10:00', '2019-10-01 12:11:00',
               '2019-10-01 12:12:00', '2019-10-01 12:13:00',
               '2019-10-01 12:14:00', '2019-10-01 12:15:00',
               '2019-10-01 12:16:00', '2019-10-01 12:17:00',
               '2019-10-01 12:18:00', '2019-10-01 12:19:00',
               '2019-10-01 12:20:00', '2019-10-01 12:21:00',
               '2019-10-01 12:22:00', '2019-10-01 12:23:00',
               '2019-10-01 12:24:00', '2019-10-01 12:25:00',
               '2019-10-01 12:26:00', '2019-10-01 12:27:00',
               '2019-10-01 12:28:00', '2019-10-01 12:29:00',
               '2019-10-01 12:30:00'],
              dtype='datetime64[ns]', freq='T')
# S 每秒

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "S")  
DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 00:00:01',
               '2019-10-01 00:00:02', '2019-10-01 00:00:03',
               '2019-10-01 00:00:04', '2019-10-01 00:00:05',
               '2019-10-01 00:00:06', '2019-10-01 00:00:07',
               '2019-10-01 00:00:08', '2019-10-01 00:00:09',
               '2019-10-01 00:00:10', '2019-10-01 00:00:11',
               '2019-10-01 00:00:12', '2019-10-01 00:00:13',
               '2019-10-01 00:00:14', '2019-10-01 00:00:15',
               '2019-10-01 00:00:16', '2019-10-01 00:00:17',
               '2019-10-01 00:00:18', '2019-10-01 00:00:19',
               '2019-10-01 00:00:20', '2019-10-01 00:00:21',
               '2019-10-01 00:00:22', '2019-10-01 00:00:23',
               '2019-10-01 00:00:24', '2019-10-01 00:00:25',
               '2019-10-01 00:00:26', '2019-10-01 00:00:27',
               '2019-10-01 00:00:28', '2019-10-01 00:00:29',
               '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', freq='S')
# L 每毫秒 (千分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "L")  
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.001000',
               '2019-10-01 00:00:00.002000', '2019-10-01 00:00:00.003000',
               '2019-10-01 00:00:00.004000', '2019-10-01 00:00:00.005000',
               '2019-10-01 00:00:00.006000', '2019-10-01 00:00:00.007000',
               '2019-10-01 00:00:00.008000', '2019-10-01 00:00:00.009000',
               ...
               '2019-10-01 00:00:29.991000', '2019-10-01 00:00:29.992000',
               '2019-10-01 00:00:29.993000', '2019-10-01 00:00:29.994000',
               '2019-10-01 00:00:29.995000', '2019-10-01 00:00:29.996000',
               '2019-10-01 00:00:29.997000', '2019-10-01 00:00:29.998000',
               '2019-10-01 00:00:29.999000',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30001, freq='L')
# U 每微秒 (百万分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百万分之一秒)
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',
               '2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
               '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
               '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
               '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
               ...
               '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
               '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
               '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
               '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
               '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30000001, freq='U')
# U 每微秒 (百万分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百万分之一秒)
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',
               '2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
               '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
               '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
               '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
               ...
               '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
               '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
               '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
               '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
               '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30000001, freq='U')
# U 每微秒 (百万分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百万分之一秒)
DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',
               '2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
               '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
               '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
               '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
               ...
               '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
               '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
               '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
               '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
               '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'],
              dtype='datetime64[ns]', length=30000001, freq='U')

星期几缩写 -- MON/TUE/WED/THU/FRI/SAT/SUN

# M -- 每月最后一个日历日

pd.date_range("2019", "2020", freq = "M") 
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')
# M -- 每月最后一个日历日

pd.date_range("2019", "2020", freq = "M") 
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')
# M -- 每月最后一个日历日

pd.date_range("2019", "2020", freq = "M") 
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq='M')

月份

# BM - 每月最后一个工作日

print(pd.date_range("2019", "2020", freq="BM"))
DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-29', '2019-04-30',
               '2019-05-31', '2019-06-28', '2019-07-31', '2019-08-30',
               '2019-09-30', '2019-10-31', '2019-11-29', '2019-12-31'],
              dtype='datetime64[ns]', freq='BM')
DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-JAN')
DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-31', '2019-11-30'], dtype='datetime64[ns]', freq='Q-FEB')
DatetimeIndex(['2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'], dtype='datetime64[ns]', freq='Q-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-APR')
# BQ - 每个季度末最后一月的最后一个工作日

print(pd.date_range("2019", "2021", freq="BQ-JAN"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-FEB"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-MAR"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-APR"))
DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
               '2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
              dtype='datetime64[ns]', freq='BQ-JAN')

DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-30', '2019-11-29',
               '2020-02-28', '2020-05-29', '2020-08-31', '2020-11-30'],
              dtype='datetime64[ns]', freq='BQ-FEB')

DatetimeIndex(['2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31',
               '2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31'],
              dtype='datetime64[ns]', freq='BQ-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
               '2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
              dtype='datetime64[ns]', freq='BQ-APR')
# BA -- 每年指定月份的最后一个工作日

print(pd.date_range("2019", "2021", freq="BA-JAN"))
print(pd.date_range("2019", "2023", freq="BA-FEB"))
print(pd.date_range("2019", "2021", freq="BA-MAR"))
DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='BA-JAN')
DatetimeIndex(['2019-02-28', '2020-02-28', '2021-02-26', '2022-02-28'], dtype='datetime64[ns]', freq='BA-FEB')
DatetimeIndex(['2019-03-29', '2020-03-31'], dtype='datetime64[ns]', freq='BA-MAR')
# MS -- 每月第一个日历日

pd.date_range("2019", "2020", freq="MS")
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='MS')
# QS - 每个季度末最后一月的第一个日历日

print(pd.date_range("2019", "2020", freq="QS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="QS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="QS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="QS-APR"))
DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='QS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='QS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-01', '2019-09-01', '2019-12-01'], dtype='datetime64[ns]', freq='QS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='QS-APR')
# AS -- 每年指定月份的第一个日历日

print(pd.date_range("2019", "2021", freq="AS-JAN"))
print(pd.date_range("2019", "2021", freq="AS-FEB"))
print(pd.date_range("2019", "2021", freq="AS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='AS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-01'], dtype='datetime64[ns]', freq='AS-FEB')
DatetimeIndex(['2019-12-01', '2020-12-01'], dtype='datetime64[ns]', freq='AS-DEC')
# BMS -- 每月第一个工作日

print(pd.date_range("2019", "2021", freq="BMS"))
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
               '2019-05-01', '2019-06-03', '2019-07-01', '2019-08-01',
               '2019-09-02', '2019-10-01', '2019-11-01', '2019-12-02',
               '2020-01-01', '2020-02-03', '2020-03-02', '2020-04-01',
               '2020-05-01', '2020-06-01', '2020-07-01', '2020-08-03',
               '2020-09-01', '2020-10-01', '2020-11-02', '2020-12-01',
               '2021-01-01'],
              dtype='datetime64[ns]', freq='BMS')
# BQS - 每个季度末最后一月的第一个工作日

print(pd.date_range("2019", "2020", freq="BQS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-APR"))
DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='BQS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='BQS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-03', '2019-09-02', '2019-12-02'], dtype='datetime64[ns]', freq='BQS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-01-01'],
              dtype='datetime64[ns]', freq='BQS-APR')
# BAS -- 每年指定月份的第一个工作日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')
# BAS -- 每年指定月份的第一个工作日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')
# BAS -- 每年指定月份的第一个工作日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))
DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')

复合频率

# 2M 每间隔2个月最后一个日历

pd.date_range("2019", "2021", freq="2M")  
DatetimeIndex(['2019-01-31', '2019-03-31', '2019-05-31', '2019-07-31',
               '2019-09-30', '2019-11-30', '2020-01-31', '2020-03-31',
               '2020-05-31', '2020-07-31', '2020-09-30', '2020-11-30'],
              dtype='datetime64[ns]', freq='2M')
# 2h30min 间隔是2小时30分钟

pd.date_range("2019/10/1 00:00:00", "2019/10/1 12:00:00", freq="2h30min")
DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 02:30:00',
               '2019-10-01 05:00:00', '2019-10-01 07:30:00',
               '2019-10-01 10:00:00'],
              dtype='datetime64[ns]', freq='150T')
# 2M 每间隔2个月最后一个日历

pd.date_range("2019", "2021", freq="2M")  
 DatetimeIndex(['2019-01-31', '2019-03-31', '2019-05-31', '2019-07-31',
               '2019-09-30', '2019-11-30', '2020-01-31', '2020-03-31',
               '2020-05-31', '2020-07-31', '2020-09-30', '2020-11-30'],
              dtype='datetime64[ns]', freq='2M')

asfreq 时间频率转换

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

# 这里是把D改为4H
print(ts.asfreq("4H"))
print("\n")

# method 插值模式 ffill 用之前值填充 bfill 用之后值填充
print(ts.asfreq("4H", method="ffill"))  
print("\n")

print(ts.asfreq("4H", method="bfill"))
2019-01-01    0.610403
2019-01-02    0.416557
2019-01-03    0.821631
2019-01-04    0.699457
Freq: D, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00         NaN
2019-01-01 08:00:00         NaN
2019-01-01 12:00:00         NaN
2019-01-01 16:00:00         NaN
2019-01-01 20:00:00         NaN
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00         NaN
2019-01-02 08:00:00         NaN
2019-01-02 12:00:00         NaN
2019-01-02 16:00:00         NaN
2019-01-02 20:00:00         NaN
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00         NaN
2019-01-03 08:00:00         NaN
2019-01-03 12:00:00         NaN
2019-01-03 16:00:00         NaN
2019-01-03 20:00:00         NaN
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00    0.610403
2019-01-01 08:00:00    0.610403
2019-01-01 12:00:00    0.610403
2019-01-01 16:00:00    0.610403
2019-01-01 20:00:00    0.610403
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00    0.416557
2019-01-02 08:00:00    0.416557
2019-01-02 12:00:00    0.416557
2019-01-02 16:00:00    0.416557
2019-01-02 20:00:00    0.416557
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00    0.821631
2019-01-03 08:00:00    0.821631
2019-01-03 12:00:00    0.821631
2019-01-03 16:00:00    0.821631
2019-01-03 20:00:00    0.821631
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64
2019-01-01 00:00:00    0.610403
2019-01-01 04:00:00    0.416557
2019-01-01 08:00:00    0.416557
2019-01-01 12:00:00    0.416557
2019-01-01 16:00:00    0.416557
2019-01-01 20:00:00    0.416557
2019-01-02 00:00:00    0.416557
2019-01-02 04:00:00    0.821631
2019-01-02 08:00:00    0.821631
2019-01-02 12:00:00    0.821631
2019-01-02 16:00:00    0.821631
2019-01-02 20:00:00    0.821631
2019-01-03 00:00:00    0.821631
2019-01-03 04:00:00    0.699457
2019-01-03 08:00:00    0.699457
2019-01-03 12:00:00    0.699457
2019-01-03 16:00:00    0.699457
2019-01-03 20:00:00    0.699457
2019-01-04 00:00:00    0.699457
Freq: 4H, dtype: float64

超前/滞后数据 shift(正数): 数值后移--滞后 ,shift(负数): 数值前移--超前

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

print(ts.shift(1))
print("\n")

print(ts.shift(-2))
print("\n")

# 计算变化百分比 该时间戳的值与上一个时间戳的值相比

per = ts/ts.shift(1)
print(per)
2019-01-01    0.197884
2019-01-02    0.403093
2019-01-03    0.208341
2019-01-04    0.330873
Freq: D, dtype: float64
2019-01-01         NaN
2019-01-02    0.197884
2019-01-03    0.403093
2019-01-04    0.208341
Freq: D, dtype: float64
2019-01-01    0.208341
2019-01-02    0.330873
2019-01-03         NaN
2019-01-04         NaN
Freq: D, dtype: float64
2019-01-01         NaN
2019-01-02    2.037017
2019-01-03    0.516855
2019-01-04    1.588134
Freq: D, dtype: float64

shift(freq) 加上freq参数 对时间戳进行位移 而不是对数值进行位移

print(ts)
print("\n")

print(ts.shift(2, freq="D"))  # 按天
print("\n")

print(ts.shift(2, freq="T"))  # 按分钟
2019-01-01    0.197884
2019-01-02    0.403093
2019-01-03    0.208341
2019-01-04    0.330873
Freq: D, dtype: float64
2019-01-03    0.197884
2019-01-04    0.403093
2019-01-05    0.208341
2019-01-06    0.330873
Freq: D, dtype: float64
2019-01-01 00:02:00    0.197884
2019-01-02 00:02:00    0.403093
2019-01-03 00:02:00    0.208341
2019-01-04 00:02:00    0.330873
Freq: D, dtype: float64

(1)获取更多优质内容及精彩资讯,可前往:https://www.cda.cn/?seo

(2)了解更多数据领域的优质课程:

数据分析咨询请扫描二维码

客服在线
立即咨询