! pip3 install pandas==1.2.1 -I -t .

Looking in indexes: https://mirrors.163.com/pypi/simple/
Collecting pandas==1.2.1
  Using cached https://mirrors.163.com/pypi/packages/c9/56/f415b4148622f469263ad2ece8bdf757972e94ffc97cb750dd8b79b04d43/pandas-1.2.1-cp39-cp39-manylinux1_x86_64.whl (9.7 MB)
Collecting pytz>=2017.3
  Using cached https://mirrors.163.com/pypi/packages/70/94/784178ca5dd892a98f113cdd923372024dc04b8d40abe77ca76b5fb90ca6/pytz-2021.1-py2.py3-none-any.whl (510 kB)
Collecting numpy>=1.16.5
  Using cached https://mirrors.163.com/pypi/packages/7a/4c/dd00ce768b0f0f7de5c486cbd9f5b922bc3af2f3a5da30121d7f7dc03130/numpy-1.21.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.8 MB)
Collecting python-dateutil>=2.7.3
  Using cached https://mirrors.163.com/pypi/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)
Collecting six>=1.5
  Using cached https://mirrors.163.com/pypi/packages/ee/ff/48bde5c0f013094d729fe4b0316ba2a24774b3ff1c52d924a8a4cb04078a/six-1.15.0-py2.py3-none-any.whl (10 kB)
Installing collected packages: six, pytz, python-dateutil, numpy, pandas
Successfully installed numpy-1.21.2 pandas-1.2.1 python-dateutil-2.8.1 pytz-2021.1 six-1.15.0


import pandas
pandas.__version__

'1.2.1'


import numpy as np
from scipy import linalg
A = np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])


linalg.inv(A) # inverse of a matrix

array([[-2. ,  1. ],
       [ 1.5, -0.5]])


b = np.array([[5,6]]) #2D array
b

array([[5, 6]])

b.T

array([[5],
       [6]])


A*b #not matrix multiplication!

array([[ 5, 12],
       [15, 24]])


A.dot(b.T) #matrix multiplication

array([[17],
       [39]])


b = np.array([5,6]) #1D array
b

array([5, 6])


b.T  #not matrix transpose!

array([5, 6])


A.dot(b)  #does not matter for multiplication

array([17, 39])


import pandas as pd
Ap = pd.DataFrame(A)
Ap


A2 = Ap.values
A2

array([[1, 2, 3],
       [4, 5, 6]])


type(A2)

numpy.ndarray


import numpy as np
from scipy import linalg
A = np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])


b = np.array([[5],[6]])
b

array([[5],
       [6]])


linalg.inv(A).dot(b) #slow

array([-4. ,  4.5])


A.dot(linalg.inv(A).dot(b))-b #check

array([[0.],
       [0.]])


np.linalg.solve(A,b) #fast

array([[-4. ],
       [ 4.5]])


A.dot(np.linalg.solve(A,b))-b #check

array([[0.],
       [0.]])


import numpy as np
from scipy import linalg
A = np.array([[1,2],[3,4]])
linalg.det(A)

-2.0


import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt


c1, c2 = 5.0, 2.0
i = np.r_[1:11]
xi = 0.1*i
yi = c1*np.exp(-xi) + c2*xi
zi = yi + 0.05 * np.max(yi) * np.random.randn(len(yi))


A = np.c_[np.exp(-xi)[:, np.newaxis], xi[:, np.newaxis]]
c, resid, rank, sigma = linalg.lstsq(A, zi)


xi2 = np.r_[0.1:1.0:100j]
yi2 = c[0]*np.exp(-xi2) + c[1]*xi2


plt.plot(xi,zi,'x',xi2,yi2)
plt.axis([0,1.1,3.0,5.5])
plt.xlabel('$x_i$') # LaTeX symbols are also supported
plt.title('Data fitting with linalg.lstsq')
plt.show()


import numpy as np
from scipy import linalg
A = np.array([[1,2],[3,4]])
la,v = linalg.eig(A)
l1,l2 = la
print(l1, l2)  #eigenvalues

print(v[:,0])  #first eigenvector

print(v[:,1])  #second eigenvector

print(np.sum(abs(v**2),axis=0)) #eigenvectors are unitary

v1 = np.array(v[:,0]).T
print(linalg.norm(A.dot(v1)-l1*v1)) #check the computation

(-0.3722813232690143+0j) (5.372281323269014+0j)
[-0.82456484  0.56576746]
[-0.41597356 -0.90937671]
[1. 1.]
5.551115123125783e-17


import numpy as np
from scipy import linalg
A = np.array([[1,2,3],[4,5,6]])


M,N = A.shape
U,s,Vh = linalg.svd(A)
Sig = linalg.diagsvd(s,M,N)


U, Vh = U, Vh
U

array([[-0.3863177 ,  0.92236578],
       [-0.92236578, -0.3863177 ]])

Sig

array([[9.508032  , 0.        , 0.        ],
       [0.        , 0.77286964, 0.        ]])

Vh

array([[-0.42866713, -0.56630692, -0.7039467 ],
       [-0.80596391, -0.11238241,  0.58119908],
       [ 0.40824829, -0.81649658,  0.40824829]])


U.dot(Sig.dot(Vh)) #check computation

array([[1., 2., 3.],
       [4., 5., 6.]])


from scipy.stats import norm
r = norm.rvs(loc=0, scale=1, size=1000)


mean, var, skew, kurt = norm.stats(moments='mvsk')


from scipy import stats
import numpy as np
x = np.random.random(10)
y = np.random.random(10)
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print({'slope':slope,'intercept':intercept})
print({'p_value':p_value,'r-squared':round(r_value**2,2)})

{'slope': 0.0016389519609387667, 'intercept': 0.5367248508312181}
{'p_value': 0.9950296225204127, 'r-squared': 0.0}


import numpy as np
from scipy.optimize import minimize

def rosen(x):
    """The Rosenbrock function"""
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)


x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])

## Calling the minimize() function
res = minimize(rosen, x0, method='nelder-mead',
               options={'xtol': 1e-8, 'disp': True})
print(res.x)

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 339
         Function evaluations: 571
[1. 1. 1. 1. 1.]

Fundamental Modules for Statistical Modelling¶

Install different version of Python modules¶

Python modules for Statistics¶

NumPy¶

SciPy¶

Linear Algebra¶

Matrices and n-dimensional array¶

Solving linear system¶¶

Determinant¶

Least-squares problems and pseudo-inverses¶

Eigenvalues and eigenvectors¶

Singular Value Decomposition (SVD)¶

QR decomposition¶

LU decomposition¶

Cholesky decomposition¶

Statistical Distributions¶

Linear regression model¶

Optimization¶