#Pandas库
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd #引入pandas库,主要使用其中的Series类型和DataFrame类型
'''
Series = 索引 + 一维数据
DataFrame = 行列索引 + 二维数据
1、操作索引就是操作数据
2、重新索引、数据删除、算数运算、比较运算
3、像对待单一数据一样对待Series和DataFrame对象
'''
d = pd.Series(range(20))
ds = d.cumsum()#求之前所有项的和
#两个数据类型:
#Series
#DataFrame
'''
数据结构
index→data
'''
a = pd.Series([9,8,7,6], index = ['a','b','c','d'])
a1 = pd.Series([9,8,7,6], ['a','b','c','d'])
'''
Python列表 标量值
'''
s = pd.Series(25, index = ['a','b','c','d'])#都是25
s1 = pd.Series({'a':9, 'b':8, 'c':7, 'd':6})
s2 = pd.Series({'a':9, 'b':8, 'c':7}, index = ['c','a','d', 'b']) #从字典创建Series
#ndarray中创建Series
nd1 = pd.Series(np.arange(5))
nd2 = pd.Series(np.arange(5), index = np.arange(9, 4, -1))
print(nd2.index)
print(nd2.values)
print(nd2[8])
b1 = pd.Series([9,8,7,6], ['a','b','c','d'])
print(b1)
print(b1[:3])#切片前面三个元素
b1median = b1[b1 > b1.median()]
print(b1median)
print(np.exp(b1))
print('c' in b1) #判断'c'是否在b1的索引中
print(b1['b'])
print(0 in b1)
print(b1.get('f', 100))#返回标签为'f'的值,如果没有,则新增一个标签,并赋值
b2 = pd.Series([0,1,2,0],['c','d','e','f'])
sumb1b2 = b1 + b2#并集,两个都有的就求和,否则为nan
'''
#DataFrame类型
#表格型的数据类型
#创建:
二维ndarray创建
'''
#二维数组生成DataFrame
d = pd.DataFrame(np.arange(10).reshape(2,5))
print(d)
#一维数组生成DataFrame
dt = {'one':pd.Series([1,2,3],index = ['a','b','c']),
'two':pd.Series([4,5,6,7], index = ['a','b','c','d'])}
d1= pd.DataFrame(dt)
print(d1)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd #引入pandas库,主要使用其中的Series类型和DataFrame类型
'''
Series = 索引 + 一维数据
DataFrame = 行列索引 + 二维数据
1、操作索引就是操作数据
2、重新索引、数据删除、算数运算、比较运算
3、像对待单一数据一样对待Series和DataFrame对象
'''
d = pd.Series(range(20))
ds = d.cumsum()#求之前所有项的和
#两个数据类型:
#Series
#DataFrame
'''
数据结构
index→data
'''
a = pd.Series([9,8,7,6], index = ['a','b','c','d'])
a1 = pd.Series([9,8,7,6], ['a','b','c','d'])
'''
Python列表 标量值
'''
s = pd.Series(25, index = ['a','b','c','d'])#都是25
s1 = pd.Series({'a':9, 'b':8, 'c':7, 'd':6})
s2 = pd.Series({'a':9, 'b':8, 'c':7}, index = ['c','a','d', 'b']) #从字典创建Series
#ndarray中创建Series
nd1 = pd.Series(np.arange(5))
nd2 = pd.Series(np.arange(5), index = np.arange(9, 4, -1))
print(nd2.index)
print(nd2.values)
print(nd2[8])
b1 = pd.Series([9,8,7,6], ['a','b','c','d'])
print(b1)
print(b1[:3])#切片前面三个元素
b1median = b1[b1 > b1.median()]
print(b1median)
print(np.exp(b1))
print('c' in b1) #判断'c'是否在b1的索引中
print(b1['b'])
print(0 in b1)
print(b1.get('f', 100))#返回标签为'f'的值,如果没有,则新增一个标签,并赋值
b2 = pd.Series([0,1,2,0],['c','d','e','f'])
sumb1b2 = b1 + b2#并集,两个都有的就求和,否则为nan
'''
#DataFrame类型
#表格型的数据类型
#创建:
二维ndarray创建
'''
#二维数组生成DataFrame
d = pd.DataFrame(np.arange(10).reshape(2,5))
print(d)
#一维数组生成DataFrame
dt = {'one':pd.Series([1,2,3],index = ['a','b','c']),
'two':pd.Series([4,5,6,7], index = ['a','b','c','d'])}
d1= pd.DataFrame(dt)
print(d1)