Basics

Python

The numpy library provides the basis for numerical multidimensional arrays. xarray provides additional features that allows for numpy arrays with pandas-like named dimensions and therefore very efficient representation of netCDF data and similar geospatial information.

R

All basic data in R is always already a vector (i.e. a 1-dimensional array). Two dimensional number arrays in R are called matrix but the array data type provides more general multi-dimensional data options similar to numpy in python. However, the more common approach in R is to use data_frame tabular data that can be nested (tidyr’s nest) and unnested (tidyr’s unnest) to provide higher dimmensional data that’s still compatible with dplyr and ggplot operations.

Create arrays

Python

# python
import numpy as np # import numpy
# basic numpy array (by default initialized as float)
a = np.array([3.14, 60, 17])
print(a)

## [ 3.14 60.   17.  ]

print(type(a[0]))
# specify integer arrays if desired

## <class 'numpy.float64'>

b = np.array([1, 2, 3], dtype=int)
print(b)

## [1 2 3]

print(type(b[0]))

## <class 'numpy.int64'>

# basic vector
a <- c(3.14, 60, 17) 
str(a)

##  num [1:3] 3.14 60 17

# integer vector
b <- c(1L, 2L, 3L) 
str(b)

##  int [1:3] 1 2 3

# special case for easy creation of sequential integer vectors
b2 <- 1:5
str(b2)

##  int [1:5] 1 2 3 4 5

# python
# generate a basic array of zeros
z = np.zeros((2, 5, 10)) # 2 x 5 x 10 array
print(z)
# generate an array of ones with same dimension

## [[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
## 
##  [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
##   [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]]

z2 = np.ones_like(z)
print(z2)
# range of 100 values from 0.1 to 10 reshaped into a 10x10 array

## [[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
## 
##  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
##   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]]

z3 = np.linspace(0.1,10,100).reshape(2, 5, 10)
print(z3)

## [[[ 0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1. ]
##   [ 1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2. ]
##   [ 2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8  2.9  3. ]
##   [ 3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4. ]
##   [ 4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5. ]]
## 
##  [[ 5.1  5.2  5.3  5.4  5.5  5.6  5.7  5.8  5.9  6. ]
##   [ 6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9  7. ]
##   [ 7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8. ]
##   [ 8.1  8.2  8.3  8.4  8.5  8.6  8.7  8.8  8.9  9. ]
##   [ 9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9 10. ]]]

R

# R
# notice that although the dimensions are the same size, the way the arrays are populated is quite different!
str(py$z3) # array coming from python

##  num [1:2, 1:5, 1:10] 0.1 5.1 1.1 6.1 2.1 7.1 3.1 8.1 4.1 9.1 ...

z3 <- array(seq(0.1, 100, by = 0.1), dim = c(2, 5, 10))
str(z3) # "same" array created in R

##  num [1:2, 1:5, 1:10] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...

# default array values - either e.g. a sequence as illustrated above or single default
str(array(NA_real_, dim = c(2, 5, 10))) # not available as default

##  num [1:2, 1:5, 1:10] NA NA NA NA NA NA NA NA NA NA ...

str(z <- array(0L, dim = c(2, 5, 10))) # 0 integer as default

##  int [1:2, 1:5, 1:10] 0 0 0 0 0 0 0 0 0 0 ...

str(z2 <- array(1, dim = c(2, 5, 10))) # 1.0 numeric (=float) as default

##  num [1:2, 1:5, 1:10] 1 1 1 1 1 1 1 1 1 1 ...

# note that arrays in R can also have named dimensions
zdim <- array(1:100, dim = c(2, 5, 10), dimnames = list(x = str_c("x", 1:2), y = str_c("y", 1:5), z = str_c("z", 1:10)))
str(zdim)

##  int [1:2, 1:5, 1:10] 1 2 3 4 5 6 7 8 9 10 ...
##  - attr(*, "dimnames")=List of 3
##   ..$ x: chr [1:2] "x1" "x2"
##   ..$ y: chr [1:5] "y1" "y2" "y3" "y4" ...
##   ..$ z: chr [1:10] "z1" "z2" "z3" "z4" ...

print(zdim)

## , , z = z1
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1  1  3  5  7  9
##   x2  2  4  6  8 10
## 
## , , z = z2
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 11 13 15 17 19
##   x2 12 14 16 18 20
## 
## , , z = z3
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 21 23 25 27 29
##   x2 22 24 26 28 30
## 
## , , z = z4
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 31 33 35 37 39
##   x2 32 34 36 38 40
## 
## , , z = z5
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 41 43 45 47 49
##   x2 42 44 46 48 50
## 
## , , z = z6
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 51 53 55 57 59
##   x2 52 54 56 58 60
## 
## , , z = z7
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 61 63 65 67 69
##   x2 62 64 66 68 70
## 
## , , z = z8
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 71 73 75 77 79
##   x2 72 74 76 78 80
## 
## , , z = z9
## 
##     y
## x    y1 y2 y3 y4 y5
##   x1 81 83 85 87 89
##   x2 82 84 86 88 90
## 
## , , z = z10
## 
##     y
## x    y1 y2 y3 y4  y5
##   x1 91 93 95 97  99
##   x2 92 94 96 98 100

Structure information

Python

# python
# get shape and size information
print(np.shape(a)) # dimensions

## (3,)

print(np.size(a)) # total # elements

## 3

print(np.shape(z)) # dimensions

## (2, 5, 10)

print(np.size(z)) # total # elements
# reshaping into different dimensions

## 100

print(z3.reshape(20,5))

## [[ 0.1  0.2  0.3  0.4  0.5]
##  [ 0.6  0.7  0.8  0.9  1. ]
##  [ 1.1  1.2  1.3  1.4  1.5]
##  [ 1.6  1.7  1.8  1.9  2. ]
##  [ 2.1  2.2  2.3  2.4  2.5]
##  [ 2.6  2.7  2.8  2.9  3. ]
##  [ 3.1  3.2  3.3  3.4  3.5]
##  [ 3.6  3.7  3.8  3.9  4. ]
##  [ 4.1  4.2  4.3  4.4  4.5]
##  [ 4.6  4.7  4.8  4.9  5. ]
##  [ 5.1  5.2  5.3  5.4  5.5]
##  [ 5.6  5.7  5.8  5.9  6. ]
##  [ 6.1  6.2  6.3  6.4  6.5]
##  [ 6.6  6.7  6.8  6.9  7. ]
##  [ 7.1  7.2  7.3  7.4  7.5]
##  [ 7.6  7.7  7.8  7.9  8. ]
##  [ 8.1  8.2  8.3  8.4  8.5]
##  [ 8.6  8.7  8.8  8.9  9. ]
##  [ 9.1  9.2  9.3  9.4  9.5]
##  [ 9.6  9.7  9.8  9.9 10. ]]

R

# R
# get shape and size information
print(dim(a)) # dimension is NOT defined for a vector

## NULL

print(length(a)) # total # elements

## [1] 3

print(dim(z)) # dimensions of array

## [1]  2  5 10

print(length(z)) # total # elements

## [1] 100

# reshaping into different dimensions
z3_copy <- z3
str(z3_copy)

##  num [1:2, 1:5, 1:10] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...

dim(z3_copy) <- c(20, 5)
str(z3_copy)

##  num [1:20, 1:5] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...

Selectors

Python

# python
# select subsets of the array by 0-based index
print(a)

## [ 3.14 60.   17.  ]

print(a[0]) # index from beginning

## 3.14

print(a[0:2]) # range (EXCLUSIVE end index)

## [ 3.14 60.  ]

print(a[-2]) # index from end

## 60.0

print(a[-2:]) # range till end
# same for multidimensional arrays, each dimension as separate index

## [60. 17.]

print(z3[0:2,3:,1:4])

## [[[3.2 3.3 3.4]
##   [4.2 4.3 4.4]]
## 
##  [[8.2 8.3 8.4]
##   [9.2 9.3 9.4]]]

R

# R
# quite different from python! indexing is 1-based and the end index is INCLUSIVE, also the -x syntax means exclude, not count from the end
print(a)

## [1]  3.14 60.00 17.00

print(a[1]) # index from beginning (1-base)

## [1] 3.14

print(a[1:2]) # range (INCLUSIVE end index)

## [1]  3.14 60.00

print(tail(a, 2)) # get as indexed from back

## [1] 60 17

print(a[-2]) # EXCLUDE this index

## [1]  3.14 17.00

print(a[-c(2:3)]) # EXCLUDE range

## [1] 3.14

# same for multidimensional arrays, each dimension as separate index but 1-based
str(z3[1:2,3:4,2:4])

##  num [1:2, 1:2, 1:3] 1.5 1.6 1.7 1.8 2.5 2.6 2.7 2.8 3.5 3.6 ...

Filtering

Python

# python
# use where for filtering
[i] = np.where(a < 20) # get indices (0-based as always)
print(i)

## [0 2]

print(a[i])

## [ 3.14 17.  ]

print(a[np.where(a < 20)]) # directly
# same for multidimensional arrays except multiple indices are returned

## [ 3.14 17.  ]

[i,j,k] = np.where(z3 > 9.5) # store indices
print(z3[i,j,k])

## [ 9.6  9.7  9.8  9.9 10. ]

print(z3[np.where(z3 > 9.5)]) # directly

## [ 9.6  9.7  9.8  9.9 10. ]

R

# R
i <- which(a < 20) # get indices (1-based as always)
print(i)

## [1] 1 3

print(a[i])

## [1]  3.14 17.00

print(a[a < 20]) # directly

## [1]  3.14 17.00

# same for multidimensional arrays
is <- which(z3 > 9.5) # different to python! single index vector returned (array is still just a vector underneath)
print(z3[is]) # consequently selected values no longer have their dimensionality!

## [1]  9.6  9.7  9.8  9.9 10.0

print(z3[z3 > 9.5]) # directly

## [1]  9.6  9.7  9.8  9.9 10.0

R alternative: switch to data frames

# R: alternative
# this is where conversion to data frame becomes more and more useful
array_to_df <- function(array) { 
  array %>% 
    # convert array to table (+ to dplyr data frame)
    as.data.frame.table() %>% tbl_df() %>%
    # switch to index instead of named dimensions
    mutate_if(is.factor, as.integer) %>% 
    return()
}
  
# convert array to data frame (note: column renaming is optional)
z3_df <- z3 %>% array_to_df() %>% rename(x = Var1, y = Var2, z = Var3, value = Freq)
z2_df <- z2 %>% array_to_df() %>% rename(x = Var1, y = Var2, z = Var3, value = Freq)

# what does this look like?
z3_df

# now both selection and filtration all become the same process & very intuitive
z3_df %>% filter(x %in% 1:2, y %in% 3:4, z %in% 2:4) # by 'index'

z3_df %>% filter(value > 9.5) # by value, all index info also preserved

# nesting / unnesting: easy way to step up/down the dimensionality
z3_df %>% nest(-x) # nest everything but the x dimension

z3_df %>% nest(-x, -y) # nest everything but x and y

z3_df %>% nest(-x) %>% unnest(data) # back where we started

Calculations

Python

# python
# are performed element-wise!
z_plus = z2 + z3
print(z_plus)

## [[[ 1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2. ]
##   [ 2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8  2.9  3. ]
##   [ 3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4. ]
##   [ 4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5. ]
##   [ 5.1  5.2  5.3  5.4  5.5  5.6  5.7  5.8  5.9  6. ]]
## 
##  [[ 6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9  7. ]
##   [ 7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8. ]
##   [ 8.1  8.2  8.3  8.4  8.5  8.6  8.7  8.8  8.9  9. ]
##   [ 9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9 10. ]
##   [10.1 10.2 10.3 10.4 10.5 10.6 10.7 10.8 10.9 11. ]]]

z_times = z2 * z3
print(z_times)

## [[[ 0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1. ]
##   [ 1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2. ]
##   [ 2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8  2.9  3. ]
##   [ 3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4. ]
##   [ 4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5. ]]
## 
##  [[ 5.1  5.2  5.3  5.4  5.5  5.6  5.7  5.8  5.9  6. ]
##   [ 6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9  7. ]
##   [ 7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8. ]
##   [ 8.1  8.2  8.3  8.4  8.5  8.6  8.7  8.8  8.9  9. ]
##   [ 9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9 10. ]]]

# better to be explicit using the .multiply / .dot functions
print(np.multiply(z2, z3)) # element-wise multiplication
# dot product does not work with these dimensions

## [[[ 0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1. ]
##   [ 1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2. ]
##   [ 2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8  2.9  3. ]
##   [ 3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4. ]
##   [ 4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5. ]]
## 
##  [[ 5.1  5.2  5.3  5.4  5.5  5.6  5.7  5.8  5.9  6. ]
##   [ 6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9  7. ]
##   [ 7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8. ]
##   [ 8.1  8.2  8.3  8.4  8.5  8.6  8.7  8.8  8.9  9. ]
##   [ 9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9 10. ]]]

try:
  print(np.dot(z3, z3)) 
except:
  print("Error:", sys.exc_info()[1])
  
# try again with compatible dimensions

## Error: shapes (2,5,10) and (2,5,10) not aligned: 10 (dim 2) != 5 (dim 1)

print(np.dot(z3.reshape(10, 10), z3.reshape(10, 10)))

## [[ 33.55  34.1   34.65  35.2   35.75  36.3   36.85  37.4   37.95  38.5 ]
##  [ 79.55  81.1   82.65  84.2   85.75  87.3   88.85  90.4   91.95  93.5 ]
##  [125.55 128.1  130.65 133.2  135.75 138.3  140.85 143.4  145.95 148.5 ]
##  [171.55 175.1  178.65 182.2  185.75 189.3  192.85 196.4  199.95 203.5 ]
##  [217.55 222.1  226.65 231.2  235.75 240.3  244.85 249.4  253.95 258.5 ]
##  [263.55 269.1  274.65 280.2  285.75 291.3  296.85 302.4  307.95 313.5 ]
##  [309.55 316.1  322.65 329.2  335.75 342.3  348.85 355.4  361.95 368.5 ]
##  [355.55 363.1  370.65 378.2  385.75 393.3  400.85 408.4  415.95 423.5 ]
##  [401.55 410.1  418.65 427.2  435.75 444.3  452.85 461.4  469.95 478.5 ]
##  [447.55 457.1  466.65 476.2  485.75 495.3  504.85 514.4  523.95 533.5 ]]

R

# R
# are also performed element-wise by default
z_plus = z2 + z3
str(z_plus)

##  num [1:2, 1:5, 1:10] 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2 ...

z_times = z2 * z3
str(z_times)

##  num [1:2, 1:5, 1:10] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...

# in data frame format
z2_df %>% 
  # combine with z3 data frame that has the same dimensions
  full_join(z3_df, by = c("x", "y", "z"), suffix = c(".z2", ".z3")) %>% 
  # make any element by element calculations
  mutate(
    z_plus = value.z2 + value.z3,
    z_times = value.z2 * value.z3
  )

# matrix multiplication is a bit trickier, a few examples
(v <- 1:4)

## [1] 1 2 3 4

v %*% v

##      [,1]
## [1,]   30

(w <- diag(v))

##      [,1] [,2] [,3] [,4]
## [1,]    1    0    0    0
## [2,]    0    2    0    0
## [3,]    0    0    3    0
## [4,]    0    0    0    4

w %*% v

##      [,1]
## [1,]    1
## [2,]    4
## [3,]    9
## [4,]   16

(m <- array(1:12, dim = c(4,3)))

##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12

v %*% m

##      [,1] [,2] [,3]
## [1,]   30   70  110

tryCatch(m %*% m, error = function(e) message("Error: ", e$message))

## Error: non-conformable arguments

Logical Operations

Python

# python - element wise logic
print(a < 20)
# logic on multiple values

## [ True False  True]

print(np.logical_or(a < 4.0, a > 20.0))
# find out what the values are of the elements that match this condition

## [ True  True False]

print(a[np.logical_or(a < 4.0, a > 20.0)])
# logic for multidimensional arrays

## [ 3.14 60.  ]

print(z3[np.logical_or(z3 < 0.5, z3 > 9)])

## [ 0.1  0.2  0.3  0.4  9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9 10. ]

R

# R - also element wise logic
print(a < 20)

## [1]  TRUE FALSE  TRUE

print(a < 4.0 | a > 20.0) # since everything is a vector, this works directly with vector logic (&, |)

## [1]  TRUE  TRUE FALSE

print(a[a < 4.0 | a > 20.0])

## [1]  3.14 60.00

# logic for multidimensional arrays
print(z3[z3 < 0.5 | z3 > 9])

##  [1]  0.1  0.2  0.3  0.4  9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9 10.0

# or the data frame version
z3_df %>% filter(value < 0.5 | value > 9)

Numerical calculation

Example 1: radioactive decay with production term

\[ \frac{dC}{dt} = -kC + P \]

Analytical solution

\[ C(t) = \frac{P}{k}\left(1-e^{-kt}\right) \]

Numerical solution using finite difference.

\[ \frac{dC}{dt} \approx \frac{C^{t+1} - C^t}{\Delta t} = -kC^t + P \\ C^{t+1} = C^t - k \Delta t C^t + P \Delta t \]

Python

# python - define parameters
C = 0.0
dt = 100.0
run_duration = 2.0e6
num_time_steps = int(run_duration / dt)
hl = 730000
k = np.log(2.0)/hl
P = 20.0
C = np.zeros(num_time_steps)

# python - run model
for i in range(1,num_time_steps):
  C[i] = C[i-1] + dt * (-k * C[i-1] + P)

# python - plot model result
import matplotlib.pyplot as plt
t = np.arange(0.0, run_duration / 1000.0, dt / 1000.0)
plt.plot(t, C)
plt.xlabel("Time [ky]")
plt.ylabel("C")
plt.show()

R

# R - define parameters 
# could do this in global variables but more common would use a data frame
params <- data_frame(
  C0 = 0.0,
  dt = 100.0,
  run_duration = 2.0e6,
  num_time_steps = as.integer(run_duration / dt),
  hl = 730000,
  k = log(2.0)/hl,
  P = 20.0
)
params

# R - initialize data outline
data <- data_frame(
  # time axis (in 1000 years)
  time.ka = seq(0, by = params$dt/1000, length.out = params$num_time_steps),
  # data with lots of NAs
  C = c(params$C0, rep(NA, params$num_time_steps - 1))
)
data

# run the numerical model
for (i in 2:params$num_time_steps) {
  data$C[i] <- data$C[i-1] + params$dt * (-params$k * data$C[i-1] + params$P)
}

# R - plot
data %>% 
  ggplot() + 
  aes(x = time.ka, y = C) + 
  geom_line() +
  theme_bw() +
  labs(x = "Time (ky)", y = "C")

Numerical computation

Sebastian Kopf & Greg Tucker

Basics

Python

R

Create arrays

Python

R

Structure information

Python

R

Selectors

Python

R

Filtering

Python

R

R alternative: switch to data frames

Calculations

Python

R

Logical Operations

Python

R

Numerical calculation

Example 1: radioactive decay with production term

Python

R