# Introduction to numpy¶

NumPy is the fundamental package for scientific computing with Python. It contains among other things:

• a powerful N-dimensional array object
• tools for integrating C/C++ and Fortran code
• useful linear algebra, Fourier transform, and random number capabilities

Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.

Library documentation: http://www.numpy.org/

# 1. The base numpy.array object¶

In [1]:
import numpy as np

# declare a vector using a list as the argument
v = np.array([1., 2, 3, 4])
v

Out[1]:
array([1., 2., 3., 4.])
In [2]:
v.shape

Out[2]:
(4,)
In [3]:
v.ndim

Out[3]:
1
In [4]:
v.dtype

Out[4]:
dtype('float64')
In [5]:
w = np.array([1., 2, 3, 4], dtype='int')
w

Out[5]:
array([1, 2, 3, 4])
In [6]:
w.dtype

Out[6]:
dtype('int64')
In [7]:
a = np.arange(100, dtype=np.uint16)

In [8]:
a

Out[8]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
dtype=uint16)
In [9]:
-3 * a ** 2

Out[9]:
array([     0,     -3,    -12,    -27,    -48,    -75,   -108,   -147,
-192,   -243,   -300,   -363,   -432,   -507,   -588,   -675,
-768,   -867,   -972,  -1083,  -1200,  -1323,  -1452,  -1587,
-1728,  -1875,  -2028,  -2187,  -2352,  -2523,  -2700,  -2883,
-3072,  -3267,  -3468,  -3675,  -3888,  -4107,  -4332,  -4563,
-4800,  -5043,  -5292,  -5547,  -5808,  -6075,  -6348,  -6627,
-6912,  -7203,  -7500,  -7803,  -8112,  -8427,  -8748,  -9075,
-9408,  -9747, -10092, -10443, -10800, -11163, -11532, -11907,
-12288, -12675, -13068, -13467, -13872, -14283, -14700, -15123,
-15552, -15987, -16428, -16875, -17328, -17787, -18252, -18723,
-19200, -19683, -20172, -20667, -21168, -21675, -22188, -22707,
-23232, -23763, -24300, -24843, -25392, -25947, -26508, -27075,
-27648, -28227, -28812, -29403], dtype=int32)
In [10]:
a

Out[10]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
dtype=uint16)
In [11]:
b = a + 1
b

Out[11]:
array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
92,  93,  94,  95,  96,  97,  98,  99, 100], dtype=uint16)
In [12]:
a is b

Out[12]:
False
In [13]:
a += 1

In [14]:
a

Out[14]:
array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
92,  93,  94,  95,  96,  97,  98,  99, 100], dtype=uint16)

### Warning¶

Beware of the dimensions: a 1D array is not the same as a 2D array with 1 column

In [15]:
a1 = np.array([1, 2, 3])
print(a1, a1.shape, a1.ndim)

[1 2 3] (3,) 1

In [16]:
a2 = np.array([[1, 2, 3]]).T
print(a2, a2.shape, a2.ndim)

[[1]
[2]
[3]] (3, 1) 2

In [17]:
a2.dot(a1)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-17-4e2276e15f5f> in <module>
----> 1 a2.dot(a1)

ValueError: shapes (3,1) and (3,) not aligned: 1 (dim 1) != 3 (dim 0)
In [18]:
# Declare a 2D array using a nested list as the argument
M = np.array([[1,2], [3,4], [3.14, -9.17]])
M

Out[18]:
array([[ 1.  ,  2.  ],
[ 3.  ,  4.  ],
[ 3.14, -9.17]])
In [19]:
M.shape

Out[19]:
(3, 2)
In [20]:
M.size

Out[20]:
6
In [21]:
M.ravel()

Out[21]:
array([ 1.  ,  2.  ,  3.  ,  4.  ,  3.14, -9.17])
In [22]:
M.ndim

Out[22]:
2
In [23]:
# arguments: start, stop, step
x = np.arange(12).reshape(4, 3)
x

Out[23]:
array([[ 0,  1,  2],
[ 3,  4,  5],
[ 6,  7,  8],
[ 9, 10, 11]])
In [24]:
np.linspace(0, 10, 50)

Out[24]:
array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])
In [25]:
np.logspace(0, 10, 10, base=np.e, dtype=np.int32)

Out[25]:
array([    1,     3,     9,    28,    85,   258,   785,  2386,  7250,
22026], dtype=int32)
In [26]:
%matplotlib inline
import matplotlib.pyplot as plt

# Random standard Gaussian numbers
wn = np.random.randn(1000)
# Cumsum: Brownian motion
bm = wn.cumsum()

plt.plot(bm)

Out[26]:
[<matplotlib.lines.Line2D at 0x7fe38837f8d0>]
In [27]:
np.diag(np.arange(10))

Out[27]:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 3, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 4, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 5, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 6, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 7, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 9]])
In [28]:
zozo = np.zeros((10, 10))
zozo

Out[28]:
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
In [29]:
zozo.shape

Out[29]:
(10, 10)
In [30]:
print(M)

[[ 1.    2.  ]
[ 3.    4.  ]
[ 3.14 -9.17]]

In [31]:
M[1, 1]

Out[31]:
4.0
In [32]:
# assign new value
M[0, 0] = 7
M[1, :] = -2
M

Out[32]:
array([[ 7.  ,  2.  ],
[-2.  , -2.  ],
[ 3.14, -9.17]])
In [33]:
# Warning: the next m is a **view** on M.
# One again, no copies unless you ask for one!
m = M[0, :]
m[:] = 42
M

Out[33]:
array([[42.  , 42.  ],
[-2.  , -2.  ],
[ 3.14, -9.17]])

# 2 Slicing¶

In [34]:
# slicing works just like with anything else (lists, etc.)
A = np.array([1, 2, 3, 4, 5])
print(A)
print(A[::-1])
print(A[::2])
print(A[:-1])

[1 2 3 4 5]
[5 4 3 2 1]
[1 3 5]
[1 2 3 4]

In [35]:
A = np.array([[n + m * 10 for n in range(5)] for m in range(5)])
A

Out[35]:
array([[ 0,  1,  2,  3,  4],
[10, 11, 12, 13, 14],
[20, 21, 22, 23, 24],
[30, 31, 32, 33, 34],
[40, 41, 42, 43, 44]])
In [36]:
print(A[1:4])

[[10 11 12 13 14]
[20 21 22 23 24]
[30 31 32 33 34]]

In [37]:
print(A[1:4, :])

[[10 11 12 13 14]
[20 21 22 23 24]
[30 31 32 33 34]]

In [38]:
A[1]

Out[38]:
array([10, 11, 12, 13, 14])
In [39]:
A[:, 1]

Out[39]:
array([ 1, 11, 21, 31, 41])
In [40]:
A[:, ::-1]

Out[40]:
array([[ 4,  3,  2,  1,  0],
[14, 13, 12, 11, 10],
[24, 23, 22, 21, 20],
[34, 33, 32, 31, 30],
[44, 43, 42, 41, 40]])
In [41]:
print(A)

[[ 0  1  2  3  4]
[10 11 12 13 14]
[20 21 22 23 24]
[30 31 32 33 34]
[40 41 42 43 44]]

In [42]:
row_indices = [1, 2, 4]
print(A[row_indices])

[[10 11 12 13 14]
[20 21 22 23 24]
[40 41 42 43 44]]

In [43]:
A[:, row_indices]

Out[43]:
array([[ 1,  2,  4],
[11, 12, 14],
[21, 22, 24],
[31, 32, 34],
[41, 42, 44]])

Another way is through masking with an array of bools

In [44]:
# index masking
B = np.arange(5)
row_mask = np.array([True, False, True, False, False])
print(B)

[0 1 2 3 4]
[0 2]


# 3. Copies¶

Don't forget that python does not make copies unless told to do so (same as with any mutable type)

If you are not careful enough, this typically leads to a lot of errors and to being fired !!

In [45]:
x = np.arange(6)
y = x
x[2] = 123
y

Out[45]:
array([  0,   1, 123,   3,   4,   5])
In [46]:
x is y

Out[46]:
True
In [47]:
# A real copy
y = x.copy()

In [48]:
# Or equivalently (but the one above is better...)
y = np.copy(x)

In [49]:
x[0] = -12
print(x, y)

[-12   1 123   3   4   5] [  0   1 123   3   4   5]

In [50]:
x is y

Out[50]:
False

To put values of x in y (copy values into an existing array) use

In [51]:
y[:] = x


### Final warning¶

In the next line you copy the values of x into an existing array y (of same size...)

In [52]:
y[:] = x


While in the next line, you give another name y to the object with name x

(and actually, you should never, never, never write something like this)

In [53]:
y = x


# 4. Miscellaneous stuff¶

### Non-numerical values¶

A numpy array can contain other things than numeric types

In [54]:
arr = np.array(['ou', 'la', 'la', "c'est", 'dur', 'python'])

In [55]:
arr.sum()

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-55-8a56dcf4f91a> in <module>
----> 1 arr.sum()

/anaconda3/lib/python3.7/site-packages/numpy/core/_methods.py in _sum(a, axis, dtype, out, keepdims, initial)
34 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
35          initial=_NoValue):
---> 36     return umr_sum(a, axis, dtype, out, keepdims, initial)
37
38 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,

TypeError: cannot perform reduce with flexible type
In [56]:
arr.dtype

Out[56]:
dtype('<U6')

### A matrix is no 2D array in numpy¶

We've been using only array or ndarray objects for now

The is another type: the matrix type

In summary: just don't use it (IMO) and stick with arrays

In [57]:
# Matrix VS array objects in numpy
m1 = np.matrix(np.arange(3))
m2 = np.matrix(np.arange(3))

In [58]:
a1 = np.arange(3)
a2 = np.arange(3)

In [59]:
m1 * m2

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-59-8448bc335613> in <module>
----> 1 m1 * m2

/anaconda3/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py in __mul__(self, other)
218         if isinstance(other, (N.ndarray, list, tuple)) :
219             # This promotes 1-D vectors to row vectors
--> 220             return N.dot(self, asmatrix(other))
221         if isscalar(other) or not hasattr(other, '__rmul__') :
222             return N.dot(self, other)

ValueError: shapes (1,3) and (1,3) not aligned: 3 (dim 1) != 1 (dim 0)
In [60]:
a1 * a2

Out[60]:
array([0, 1, 4])

### Sparse matrices¶

In [61]:
from scipy.sparse import csc_matrix, csr_matrix, coo_matrix

probs = np.full(fill_value=1/4, shape=(4,))
X = np.random.multinomial(n=2, pvals=probs, size=4)
X

Out[61]:
array([[1, 0, 0, 1],
[1, 0, 0, 1],
[0, 1, 1, 0],
[1, 0, 1, 0]])
In [62]:
X_coo = coo_matrix(X)
print(X_coo)
X_coo

  (0, 0)	1
(0, 3)	1
(1, 0)	1
(1, 3)	1
(2, 1)	1
(2, 2)	1
(3, 0)	1
(3, 2)	1

Out[62]:
<4x4 sparse matrix of type '<class 'numpy.int64'>'
with 8 stored elements in COOrdinate format>
In [63]:
X_coo.nnz

Out[63]:
8
In [64]:
print(X, end='\n----\n')
print(X_coo.data, end='\n----\n')
print(X_coo.row, end='\n----\n')
print(X_coo.col, end='\n----\n')

[[1 0 0 1]
[1 0 0 1]
[0 1 1 0]
[1 0 1 0]]
----
[1 1 1 1 1 1 1 1]
----
[0 0 1 1 2 2 3 3]
----
[0 3 0 3 1 2 0 2]
----


There is also

• csr_matrix: sparse rows format
• csc_matrix: sparse columns format

Sparse rows is often used for machine learning: sparse features vectors

But sparse column format useful as well (e.g. coordinate gradient descent)

### Bored of seing too many decimals?¶

In [65]:
X = np.random.randn(5, 5)
X

Out[65]:
array([[-0.63036028,  1.11021343,  0.99621098,  0.21559518,  0.88376054],
[-0.25014246, -0.12628093,  0.44240165, -0.27955855, -0.91122102],
[ 0.31558855, -0.58863629,  0.89410682,  0.75962722, -0.78295019],
[-1.10266165, -1.01002557, -1.4394586 ,  1.45987116, -0.04861532],
[-0.87833319,  1.55374192, -1.21265885,  1.43595696,  0.9805238 ]])
In [66]:
# All number displayed by numpy (in the current kernel) are with 3 decimals max
np.set_printoptions(precision=3)
print(X)
np.set_printoptions(precision=8)

[[-0.63   1.11   0.996  0.216  0.884]
[-0.25  -0.126  0.442 -0.28  -0.911]
[ 0.316 -0.589  0.894  0.76  -0.783]
[-1.103 -1.01  -1.439  1.46  -0.049]
[-0.878  1.554 -1.213  1.436  0.981]]


### Not limited to 2D!¶

numpy arrays can have any number of dimension (hence the name ndarray)

In [67]:
X = np.arange(18).reshape(3, 2, 3)
X

Out[67]:
array([[[ 0,  1,  2],
[ 3,  4,  5]],

[[ 6,  7,  8],
[ 9, 10, 11]],

[[12, 13, 14],
[15, 16, 17]]])
In [68]:
X.shape

Out[68]:
(3, 2, 3)
In [69]:
X.ndim

Out[69]:
3

# 5. Aggregations and statistics¶

In [70]:
A = np.arange(42).reshape(7, 6)
A

Out[70]:
array([[ 0,  1,  2,  3,  4,  5],
[ 6,  7,  8,  9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35],
[36, 37, 38, 39, 40, 41]])
In [71]:
A.sum()

Out[71]:
861
In [72]:
A[:, 3].mean()

Out[72]:
21.0
In [73]:
A.mean(axis=0)

Out[73]:
array([18., 19., 20., 21., 22., 23.])
In [74]:
A.mean(axis=1)

Out[74]:
array([ 2.5,  8.5, 14.5, 20.5, 26.5, 32.5, 38.5])
In [75]:
A[:,3].std(), A[:,3].var()

Out[75]:
(12.0, 144.0)
In [76]:
A[:,3].min(), A[:,3].max()

Out[76]:
(3, 39)
In [77]:
A.cumsum(axis=1)

Out[77]:
array([[  0,   1,   3,   6,  10,  15],
[  6,  13,  21,  30,  40,  51],
[ 12,  25,  39,  54,  70,  87],
[ 18,  37,  57,  78, 100, 123],
[ 24,  49,  75, 102, 130, 159],
[ 30,  61,  93, 126, 160, 195],
[ 36,  73, 111, 150, 190, 231]])
In [78]:
# sum of diagonal
A.trace()

Out[78]:
105

# 6. Linear Algebra¶

In [79]:
A = np.arange(30).reshape(6, 5)
v1 = np.arange(0, 5)
v2 = np.arange(5, 10)

In [80]:
v1

Out[80]:
array([0, 1, 2, 3, 4])
In [81]:
v2

Out[81]:
array([5, 6, 7, 8, 9])
In [82]:
v1 * v2

Out[82]:
array([ 0,  6, 14, 24, 36])

### Inner products¶

In [83]:
# Inner product between vectors
print(v1.dot(v2))

# You can use also (but first solution is better)
print(np.dot(v1, v2))

80
80

In [84]:
A, v1

Out[84]:
(array([[ 0,  1,  2,  3,  4],
[ 5,  6,  7,  8,  9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29]]), array([0, 1, 2, 3, 4]))
In [85]:
A.shape, v1.shape

Out[85]:
((6, 5), (5,))
In [86]:
# Matrix-vector inner product
A.dot(v1)

Out[86]:
array([ 30,  80, 130, 180, 230, 280])
In [87]:
# Transpose
A.T

Out[87]:
array([[ 0,  5, 10, 15, 20, 25],
[ 1,  6, 11, 16, 21, 26],
[ 2,  7, 12, 17, 22, 27],
[ 3,  8, 13, 18, 23, 28],
[ 4,  9, 14, 19, 24, 29]])
In [88]:
print(v1)
# Inline operations (same for *=, /=, -=)
v1 += 2

[0 1 2 3 4]


### Linear systems¶

In [89]:
A = np.array([[1,2,3], [4,5,6], [7,8,9]])
b = np.array([1,2,3])
print(A, b, sep=2 * '\n')

[[1 2 3]
[4 5 6]
[7 8 9]]

[1 2 3]

In [90]:
# solve a system of linear equations
x = np.linalg.solve(A, b)
x

Out[90]:
array([-0.23333333,  0.46666667,  0.1       ])
In [91]:
A.dot(x)

Out[91]:
array([1., 2., 3.])

### Eigenvalues and eigenvectors¶

In [92]:
A = np.random.rand(3,3)
B = np.random.rand(3,3)

evals, evecs = np.linalg.eig(A)
evals

Out[92]:
array([ 1.81812997, -0.37929675,  0.19575061])
In [93]:
evecs

Out[93]:
array([[ 0.44296158,  0.59591516,  0.44101265],
[ 0.56209248,  0.53654506, -0.89735991],
[ 0.69845335, -0.59749855, -0.01590714]])

### Singular value decomposition (SVD)¶

Decomposes any matrix $A \in \mathbb R^{m \times n}$ as follows: $$A = U S V^\top$$ where

• $U$ and $V$ are orthonormal matrices (meaning that $U^\top U = I$ and $V^\top V = I$)
• $S$ is a diagonal matrix that contains the singular values in decreasing order
In [94]:
print(A)
U, S, V = np.linalg.svd(A)

[[0.39051812 0.08082301 0.84035274]
[0.33170512 0.34143184 0.97802995]
[0.89887386 0.42922605 0.90263387]]

In [95]:
U.dot(np.diag(S)).dot(V)

Out[95]:
array([[0.39051812, 0.08082301, 0.84035274],
[0.33170512, 0.34143184, 0.97802995],
[0.89887386, 0.42922605, 0.90263387]])
In [96]:
# U and V are indeed orthonormal
np.set_printoptions(precision=2)
print(U.T.dot(U), V.T.dot(V), sep=2 * '\n')
np.set_printoptions(precision=8)

[[ 1.00e+00  1.91e-17 -1.84e-16]
[ 1.91e-17  1.00e+00  6.52e-17]
[-1.84e-16  6.52e-17  1.00e+00]]

[[1.00e+00 2.83e-17 1.99e-16]
[2.83e-17 1.00e+00 4.37e-17]
[1.99e-16 4.37e-17 1.00e+00]]


### Exercice: the racoon SVD¶

• Load the racoon face picture using scipy.misc.face()
• Visualize the picture
• Write a function which reshapes the picture into a 2D array, and computes the best rank-r approximation of it (the prototype of the function is compute_approx(X, r)
• Display the different approximations for r between 5 and 100
In [97]:
import numpy as np
from scipy.misc import face
import matplotlib.pyplot as plt
%matplotlib inline

X = face()
plt.imshow(X)
_ = plt.axis('off')

In [98]:
def compute_approx(X: np.ndarray, r: int):
"""Computes the best rank-r approximation of X using SVD.
We expect X to the 3D array corresponding to a color image, that we
reduce to a 2D one to apply SVD (no broadcasting).

Parameters
----------
X : np.ndarray, shape=(n_rows, n_cols, 3)
The input 3D ndarray

r : int
The desired rank

Return
------
output : np.ndarray, shape=(n_rows, n_cols, 3)
The best rank-r approximation of X
"""
n_rows, n_cols, n_channels = X.shape
# Reshape X to a 2D array
X_reshape = X.reshape(n_rows, n_cols * n_channels)
# Compute SVD
U, S, V = np.linalg.svd(X_reshape, full_matrices=False)
# Keep only the top r first singular values
S[r:] = 0
# Compute the approximation
X_reshape_r = U.dot(np.diag(S)).dot(V)
# Put it between 0 and 255 again and cast to integer type
return X_reshape_r.clip(min=0, max=255).astype('int')\
.reshape(n_rows, n_cols, n_channels)

In [99]:
ranks = [100, 70, 50, 30, 10, 5]
n_ranks = len(ranks)
for i, r in enumerate(ranks):
X_r = compute_approx(X, r)
# plt.subplot(n_ranks, 1, i + 1)
plt.figure(figsize=(5, 5))
plt.imshow(X_r)
_ = plt.axis('off')
plt.title('Rank %d approximation of the racoon' % r, fontsize=16)
plt.tight_layout()