`
shaojiashuai123456
  • 浏览: 257109 次
  • 性别: Icon_minigender_1
  • 来自: 吉林
社区版块
存档分类
最新评论

python pandas

阅读更多
import sys
from pandas import Series,DataFrame
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing

plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
data = pd.read_csv("./train_samples_all.csv")

x = np.array(data[["dis","time"]])
scaler = preprocessing.StandardScaler().fit(np.array(x))
#print scaler.mean_
#print scaler.scale_

data["l_num"] = data.apply(lambda x:x["num1"]*0.5 + x["num2"], axis=1)

data_true = data[(data.label == 1)]      #过滤特征
data_false = data[(data.label == 0)]

true_des = data_true.describe()
false_des = data_false.describe()

dd = data.describe()
print dd.to_dict()

 

from scipy import stats, integrate
import seaborn as sns
sns.set(color_codes=True)
sns.pairplot(data[["label","dis","time"]],hue="label") #查看两两特征

 

#遍历dataframe
df = pd.read_csv(self.ifn)
for index, row in df.iterrows():
    print index,row

 

plt.figure(figsize=(10, 5))
plt.xticks(np.arange(0,3000,200))
plt.xlim((0,4000))
sns.distplot(data_true["dis"],kde=True,bins = 50,kde_kws={"lw": 4, "label": "true","alpha":0.4})
sns.distplot(data_false["dis"],kde=True,bins = 50,kde_kws={"lw": 3, "label": "false","alpha":0.4})

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics