#2017-11-27 library moring:

# numpy 学习使用

#大写为矩阵，小写为向量
import numpy as np

B.min(axis=0) #取出每一列的最小值
B.max(axis=1) #取出每一行的最大值

B.sum()  #全局加和
B.sum(axis=1) #每一行的加和

np.floor(A)  #取底
A is D  #A和D 相同
A is not D #A 和D不同

A.dtype #int64
A.shape #(5,5)
A.arange(12).reshape(3,4)

C=np.arange(1,2).reshape(3,4)  #生成序列 再次划分为3行四列
(A == 22 )
(A == A[1])
(A == B)

a=np.arange(12)
b=a
b.shape=3.4

id(A)  #A的编号
E=A   #浅拷贝  同一内存
E=A.copy() #深拷贝  不同内存块

import numpy.matlib as mb

mb.base  #is a array mb来自的对象
mb.flat[3]  #第三个数（从零开始d）
mb.T   #转置
mb.prod(0) #各列元素相乘

/****************************************************/

18-3-9晚：
版本信息：
	import numpy as np
	print(np.__version__)
一维数组：
	arr = np.arange(10)
指定类型：
	np.full((3, 3), True, dtype=bool)
	np.ones((3,3), dtype=bool)
筛选项：
	arr[arr % 2 == 1]
赋值：
	arr[arr % 2 == 1] = -1
	>array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])
赋值给新对象：
	out = np.where(arr % 2 == 1, -1, arr)
	#arr不变
整形：
	arr.reshape(2, -1)  
	# Setting to -1 automatically decides the number of cols
	a = np.arange(10).reshape(2,-1)
	b = np.repeat(1, 10).reshape(2,-1)
纵向链接：
	np.concatenate([a, b], axis=0)
	np.vstack([a, b])
	np.r_[a, b]
横向拼接：
	np.concatenate([a, b], axis=1)
	np.hstack([a, b])
	np.c_[a, b]
扩展：
	a = np.array([1,2,3])
	np.r_[np.repeat(a, 3), np.tile(a, 3)]
查找共同项：
	a = np.array([1,2,3,2,3,4,3,4,5,6])
	b = np.array([7,2,10,2,7,4,9,4,9,8])
	np.intersect1d(a,b)
a中删除b：
	np.setdiff1d(a,b)
两个array中的相同元素的索引
	np.where(a==b)
array按条件过滤:
	index = np.where((a >= 5) & (a <= 10))
	a[index]
	
	a[(a>=5) & (a<=10)]
将自己写的python函数maxx用于np
	pair_max = np.vectorize(maxx, otypes=[float])
交换前两列：
	arr[:, [1,0,2]]
交换两行：
	arr[[1,0,2], :]
2darray的行颠倒：
	arr[::-1]
2darray的列颠倒：
	arr[:,::-1]
有范围的随机初始化：
	rand_arr = np.random.randint(low=5, high=10, size=(5,3)) + np.random.random((5,3))
	#5-10的随机数，初始化5行3列
	rand_arr = np.random.uniform(5,10, size=(5,3))
设置小数点精度：
	np.set_printoptions(precision=3)
设置随机数种子
	np.random.seed(100)
科学表示法
	rand_arr = np.random.random([3,3])/1e3
限制打印长度：
	np.set_printoptions(threshold=6)
	np.set_printoptions(threshold=np.nan)  #全部打印
从文本中导入数据集
	url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
	np.genfromtxt(url, delimiter=',', dtype='object'
	species = np.array([row[4] for row in iris_1d])
一维变二维（4列）
	2dtres[:4]
平均值，中位数，标准差：“
	mu, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
归一化：
	Smax, Smin = sepallength.max(), sepallength.min()
	S = (sepallength - Smin)/(Smax - Smin)
	
	S = (sepallength - Smin)/sepallength.ptp()
百分数：
	np.percentile(sepallength, q=[5, 95])
随机插值：
	np.random.seed(100)
	iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
找到缺失值的位置：
	print("Number of missing values: \n", np.isnan(iris_2d[:, 0]).sum())
	print("Position of missing values: \n", np.where(np.isnan(iris_2d[:, 0])))
多条件过滤：
	condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
	iris_2d[condition]
	
	np.corrcoef(iris[:, 0], iris[:, 2])[0, 1]
找到关联：
	from scipy.stats.stats import pearsonr  
	corr, p_value = pearsonr(iris[:, 0], iris[:, 2])
检测null值：
	np.isnan(iris_2d).any()
用0代替所有的缺失值：
	iris_2d[np.isnan(iris_2d)] = 0
np.unique(species, return_counts=True)
	？？
数值到文本的映射：
	petal_length_bin = np.digitize(iris[:, 2].astype('float'), [0, 3, 5, 10])
	label_map = {1: 'small', 2: 'medium', 3: 'large', 4: np.nan}
	petal_length_cat = [label_map[x] for x in petal_length_bin]
添加新列：
	ut = np.hstack([iris_2d, volume])
随机采样：
	np.random.seed(100)
	a = np.array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
	species_out = np.random.choice(a, 150, p=[0.5, 0.25, 0.25])
按列排序：
	print(iris[iris[:,0].argsort()][:20])
获得top5的位置：
	np.partition(a, kth=-5)[-5:]
	？a。argsort（）
随机初始化：
	np.random.randint(1,11,size=(6, 10))
多维变一维：
	array_of_arrays = np.array([arr1, arr2, arr3])
排列项：？？
	print(a.argsort().argsort())
多维数组中排列项：
	print(a.ravel().argsort().argsort().reshape(a.shape))
计算每一行的最大值：
	np.amax(a, axis=1)
去掉所有的缺失值：
	a[~np.isnan(a)]
两个数组间的欧氏距离：
	dist = np.linalg.norm(a-b)
np.sign() np.diff()??
第n个重复值的下标：
	np.where(x == 1)[0][n-1]
datetime64转换为datetime
	from datetime import datetime
	dt64.astype(datetime) 
moving_average(Z, n=3).round(2)
np.arange(start, end, step)
arr_2d = np.concatenate(array_of_arrays)


# scipy 使用

#scipy可以与其它标准科学计算程序库进行比较，
比如GSL(GNU C或C++科学计算库)，或者Matlab工具箱
#重新发明造轮子，导致了充满漏洞的，未经优化的，很难分享和维护的代码

scipy.cluster 	矢量量化 / K-均值
scipy.constants 	物理和数学常数
scipy.fftpack 	傅里叶变换
scipy.integrate 	积分程序
scipy.interpolate 	插值
scipy.io 	数据输入输出
scipy.linalg 	线性代数程序
scipy.ndimage 	n维图像包
scipy.odr 	正交距离回归
scipy.optimize 	优化
scipy.signal 	信号处理
scipy.sparse 	稀疏矩阵
scipy.spatial 	空间数据结构和算法
scipy.special 	任何特殊数学函数
scipy.stats 	统计
scipy.misc 
   Miscellaneous routines:杂项程序


NumPy是一个定义了数值数组和矩阵类型和它们的[基本运算]的语言扩展
SciPy是另一种使用NumPy来做高等数学、信号处理、优化、统计和许多其它[科学任务]的语言扩展。


import numpy
from scipy import packagename  #scipy 包含numpy的所有函数


     cluster                      --- Vector Quantization / Kmeans
     fftpack                      --- Discrete Fourier Transform algorithms
     integrate                    --- Integration routines
     interpolate                  --- Interpolation Tools
     io                           --- Data input and output
     linalg                       --- Linear algebra routines
     linalg.blas                  --- Wrappers to BLAS library
     linalg.lapack                --- Wrappers to LAPACK library
     misc                         --- Various utilities that don't have
                                      another home.
     ndimage                      --- n-dimensional image package
     odr                          --- Orthogonal Distance Regression
     optimize                     --- Optimization Tools
     signal                       --- Signal Processing Tools
     sparse                       --- Sparse Matrices
     sparse.linalg                --- Sparse Linear Algebra
     sparse.linalg.dsolve         --- Linear Solvers
     sparse.linalg.dsolve.umfpack --- :Interface to the UMFPACK library:
                                      Conjugate Gradient Method (LOBPCG)
     sparse.linalg.eigen          --- Sparse Eigenvalue Solvers
     sparse.linalg.eigen.lobpcg   --- Locally Optimal Block Preconditioned
                                      Conjugate Gradient Method (LOBPCG)
     spatial                      --- Spatial data structures and algorithms
     special                      --- Special functions
     stats                        --- Statistical Functions
     
Everything private should be named starting with an underscore as much as possible.

******************scipy tutorial***********************
Scipy builds on Numpy
	In [5]: np.cast['d'](np.pi)
	Out[5]: array(3.14159265)
	In [6]: np.cast['i'](np.pi)
	Out[6]: array(3, dtype=int32)
	In [8]: np.r_[0:3]
	Out[8]: array([0, 1, 2])
	In [21]: np.select([x<0,x>0],[1,x+2])
	Out[21]: array([1, 1, 1, 0, 3, 4])
numerous:许多
 parabolic cylinder: 抛物柱面
 elliptic:椭圆
 derivatives:导数,派生物
 special:
 	数学和物理上的函数,还有底层的统计函数
 integrate:
 	各种积分函数
 	>>> from scipy import integrate
	>>> N = 5
	>>> def f(t, x):  #被积函数
...		    return np.exp(-x*t) / t**N
	>>> integrate.nquad(f, [[1, np.inf],[0, np.inf]])  #两个积分区间
	(0.20000000000002294, 1.2239614263187945e-08)
	
	Gaussian quadrature:高斯求积
	odeint:常微分方程
optimization:最优化
	minimize
	Global (brute-force) optimization routines
	least_squares \ curve_fit
	minimize_scalar\ newton
	root
interpolation:
	1-D interpolation (interp1d)
	Multivariate data interpolation (griddata)
	Spline interpolation
	Using radial basis functions for smoothing/interpolation
	[使用平滑曲线连接各个数据点]
fftpack:
	快速傅立业变换
signal:
	信号处理工具箱
	一些滤波函数
	数字图像处理中的滤波器:高斯etc
linalg:
	线性代数
	scipy.linalg contains all the functions in numpy.linalg. 
	plus some other more advanced ones not contained in numpy.linalg
	分块对角阵,等矩阵和矩阵操作
scipy.sparse.linalg.eigs:
	包装ARPACK[fortran]
	求解特征值和特征向量
	from scipy.sparse.csgraph import dijkstra
spatial:
	KDTree:
	空间数据结构和算法
statistics:
	统计学:累计分布函数,各种分布scipy.stats.genhalflogistic
	直方图等
ndimage:
	高维图像处理
io
	.sav .mat 文件的存取和转换