The numpy
library provides the basis for numerical multidimensional arrays. xarray
provides additional features that allows for numpy arrays with pandas-like named dimensions and therefore very efficient representation of netCDF data and similar geospatial information.
All basic data in R is always already a vector (i.e. a 1-dimensional array). Two dimensional number arrays in R are called matrix
but the array
data type provides more general multi-dimensional data options similar to numpy
in python. However, the more common approach in R is to use data_frame
tabular data that can be nested (tidyr’s nest
) and unnested (tidyr’s unnest
) to provide higher dimmensional data that’s still compatible with dplyr
and ggplot
operations.
# python
import numpy as np # import numpy
# basic numpy array (by default initialized as float)
a = np.array([3.14, 60, 17])
print(a)
## [ 3.14 60. 17. ]
print(type(a[0]))
# specify integer arrays if desired
## <class 'numpy.float64'>
b = np.array([1, 2, 3], dtype=int)
print(b)
## [1 2 3]
print(type(b[0]))
## <class 'numpy.int64'>
# basic vector
a <- c(3.14, 60, 17)
str(a)
## num [1:3] 3.14 60 17
# integer vector
b <- c(1L, 2L, 3L)
str(b)
## int [1:3] 1 2 3
# special case for easy creation of sequential integer vectors
b2 <- 1:5
str(b2)
## int [1:5] 1 2 3 4 5
# python
# generate a basic array of zeros
z = np.zeros((2, 5, 10)) # 2 x 5 x 10 array
print(z)
# generate an array of ones with same dimension
## [[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
##
## [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
## [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]]
z2 = np.ones_like(z)
print(z2)
# range of 100 values from 0.1 to 10 reshaped into a 10x10 array
## [[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
##
## [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
## [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]]
z3 = np.linspace(0.1,10,100).reshape(2, 5, 10)
print(z3)
## [[[ 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
## [ 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2. ]
## [ 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
## [ 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4. ]
## [ 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5. ]]
##
## [[ 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6. ]
## [ 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7. ]
## [ 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8. ]
## [ 8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9 9. ]
## [ 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10. ]]]
# R
# notice that although the dimensions are the same size, the way the arrays are populated is quite different!
str(py$z3) # array coming from python
## num [1:2, 1:5, 1:10] 0.1 5.1 1.1 6.1 2.1 7.1 3.1 8.1 4.1 9.1 ...
z3 <- array(seq(0.1, 100, by = 0.1), dim = c(2, 5, 10))
str(z3) # "same" array created in R
## num [1:2, 1:5, 1:10] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
# default array values - either e.g. a sequence as illustrated above or single default
str(array(NA_real_, dim = c(2, 5, 10))) # not available as default
## num [1:2, 1:5, 1:10] NA NA NA NA NA NA NA NA NA NA ...
str(z <- array(0L, dim = c(2, 5, 10))) # 0 integer as default
## int [1:2, 1:5, 1:10] 0 0 0 0 0 0 0 0 0 0 ...
str(z2 <- array(1, dim = c(2, 5, 10))) # 1.0 numeric (=float) as default
## num [1:2, 1:5, 1:10] 1 1 1 1 1 1 1 1 1 1 ...
# note that arrays in R can also have named dimensions
zdim <- array(1:100, dim = c(2, 5, 10), dimnames = list(x = str_c("x", 1:2), y = str_c("y", 1:5), z = str_c("z", 1:10)))
str(zdim)
## int [1:2, 1:5, 1:10] 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "dimnames")=List of 3
## ..$ x: chr [1:2] "x1" "x2"
## ..$ y: chr [1:5] "y1" "y2" "y3" "y4" ...
## ..$ z: chr [1:10] "z1" "z2" "z3" "z4" ...
print(zdim)
## , , z = z1
##
## y
## x y1 y2 y3 y4 y5
## x1 1 3 5 7 9
## x2 2 4 6 8 10
##
## , , z = z2
##
## y
## x y1 y2 y3 y4 y5
## x1 11 13 15 17 19
## x2 12 14 16 18 20
##
## , , z = z3
##
## y
## x y1 y2 y3 y4 y5
## x1 21 23 25 27 29
## x2 22 24 26 28 30
##
## , , z = z4
##
## y
## x y1 y2 y3 y4 y5
## x1 31 33 35 37 39
## x2 32 34 36 38 40
##
## , , z = z5
##
## y
## x y1 y2 y3 y4 y5
## x1 41 43 45 47 49
## x2 42 44 46 48 50
##
## , , z = z6
##
## y
## x y1 y2 y3 y4 y5
## x1 51 53 55 57 59
## x2 52 54 56 58 60
##
## , , z = z7
##
## y
## x y1 y2 y3 y4 y5
## x1 61 63 65 67 69
## x2 62 64 66 68 70
##
## , , z = z8
##
## y
## x y1 y2 y3 y4 y5
## x1 71 73 75 77 79
## x2 72 74 76 78 80
##
## , , z = z9
##
## y
## x y1 y2 y3 y4 y5
## x1 81 83 85 87 89
## x2 82 84 86 88 90
##
## , , z = z10
##
## y
## x y1 y2 y3 y4 y5
## x1 91 93 95 97 99
## x2 92 94 96 98 100
# python
# get shape and size information
print(np.shape(a)) # dimensions
## (3,)
print(np.size(a)) # total # elements
## 3
print(np.shape(z)) # dimensions
## (2, 5, 10)
print(np.size(z)) # total # elements
# reshaping into different dimensions
## 100
print(z3.reshape(20,5))
## [[ 0.1 0.2 0.3 0.4 0.5]
## [ 0.6 0.7 0.8 0.9 1. ]
## [ 1.1 1.2 1.3 1.4 1.5]
## [ 1.6 1.7 1.8 1.9 2. ]
## [ 2.1 2.2 2.3 2.4 2.5]
## [ 2.6 2.7 2.8 2.9 3. ]
## [ 3.1 3.2 3.3 3.4 3.5]
## [ 3.6 3.7 3.8 3.9 4. ]
## [ 4.1 4.2 4.3 4.4 4.5]
## [ 4.6 4.7 4.8 4.9 5. ]
## [ 5.1 5.2 5.3 5.4 5.5]
## [ 5.6 5.7 5.8 5.9 6. ]
## [ 6.1 6.2 6.3 6.4 6.5]
## [ 6.6 6.7 6.8 6.9 7. ]
## [ 7.1 7.2 7.3 7.4 7.5]
## [ 7.6 7.7 7.8 7.9 8. ]
## [ 8.1 8.2 8.3 8.4 8.5]
## [ 8.6 8.7 8.8 8.9 9. ]
## [ 9.1 9.2 9.3 9.4 9.5]
## [ 9.6 9.7 9.8 9.9 10. ]]
# R
# get shape and size information
print(dim(a)) # dimension is NOT defined for a vector
## NULL
print(length(a)) # total # elements
## [1] 3
print(dim(z)) # dimensions of array
## [1] 2 5 10
print(length(z)) # total # elements
## [1] 100
# reshaping into different dimensions
z3_copy <- z3
str(z3_copy)
## num [1:2, 1:5, 1:10] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
dim(z3_copy) <- c(20, 5)
str(z3_copy)
## num [1:20, 1:5] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
# python
# select subsets of the array by 0-based index
print(a)
## [ 3.14 60. 17. ]
print(a[0]) # index from beginning
## 3.14
print(a[0:2]) # range (EXCLUSIVE end index)
## [ 3.14 60. ]
print(a[-2]) # index from end
## 60.0
print(a[-2:]) # range till end
# same for multidimensional arrays, each dimension as separate index
## [60. 17.]
print(z3[0:2,3:,1:4])
## [[[3.2 3.3 3.4]
## [4.2 4.3 4.4]]
##
## [[8.2 8.3 8.4]
## [9.2 9.3 9.4]]]
# R
# quite different from python! indexing is 1-based and the end index is INCLUSIVE, also the -x syntax means exclude, not count from the end
print(a)
## [1] 3.14 60.00 17.00
print(a[1]) # index from beginning (1-base)
## [1] 3.14
print(a[1:2]) # range (INCLUSIVE end index)
## [1] 3.14 60.00
print(tail(a, 2)) # get as indexed from back
## [1] 60 17
print(a[-2]) # EXCLUDE this index
## [1] 3.14 17.00
print(a[-c(2:3)]) # EXCLUDE range
## [1] 3.14
# same for multidimensional arrays, each dimension as separate index but 1-based
str(z3[1:2,3:4,2:4])
## num [1:2, 1:2, 1:3] 1.5 1.6 1.7 1.8 2.5 2.6 2.7 2.8 3.5 3.6 ...
# python
# use where for filtering
[i] = np.where(a < 20) # get indices (0-based as always)
print(i)
## [0 2]
print(a[i])
## [ 3.14 17. ]
print(a[np.where(a < 20)]) # directly
# same for multidimensional arrays except multiple indices are returned
## [ 3.14 17. ]
[i,j,k] = np.where(z3 > 9.5) # store indices
print(z3[i,j,k])
## [ 9.6 9.7 9.8 9.9 10. ]
print(z3[np.where(z3 > 9.5)]) # directly
## [ 9.6 9.7 9.8 9.9 10. ]
# R
i <- which(a < 20) # get indices (1-based as always)
print(i)
## [1] 1 3
print(a[i])
## [1] 3.14 17.00
print(a[a < 20]) # directly
## [1] 3.14 17.00
# same for multidimensional arrays
is <- which(z3 > 9.5) # different to python! single index vector returned (array is still just a vector underneath)
print(z3[is]) # consequently selected values no longer have their dimensionality!
## [1] 9.6 9.7 9.8 9.9 10.0
print(z3[z3 > 9.5]) # directly
## [1] 9.6 9.7 9.8 9.9 10.0
# R: alternative
# this is where conversion to data frame becomes more and more useful
array_to_df <- function(array) {
array %>%
# convert array to table (+ to dplyr data frame)
as.data.frame.table() %>% tbl_df() %>%
# switch to index instead of named dimensions
mutate_if(is.factor, as.integer) %>%
return()
}
# convert array to data frame (note: column renaming is optional)
z3_df <- z3 %>% array_to_df() %>% rename(x = Var1, y = Var2, z = Var3, value = Freq)
z2_df <- z2 %>% array_to_df() %>% rename(x = Var1, y = Var2, z = Var3, value = Freq)
# what does this look like?
z3_df
# now both selection and filtration all become the same process & very intuitive
z3_df %>% filter(x %in% 1:2, y %in% 3:4, z %in% 2:4) # by 'index'
z3_df %>% filter(value > 9.5) # by value, all index info also preserved
# nesting / unnesting: easy way to step up/down the dimensionality
z3_df %>% nest(-x) # nest everything but the x dimension
z3_df %>% nest(-x, -y) # nest everything but x and y
z3_df %>% nest(-x) %>% unnest(data) # back where we started
# python
# are performed element-wise!
z_plus = z2 + z3
print(z_plus)
## [[[ 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2. ]
## [ 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
## [ 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4. ]
## [ 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5. ]
## [ 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6. ]]
##
## [[ 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7. ]
## [ 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8. ]
## [ 8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9 9. ]
## [ 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10. ]
## [10.1 10.2 10.3 10.4 10.5 10.6 10.7 10.8 10.9 11. ]]]
z_times = z2 * z3
print(z_times)
## [[[ 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
## [ 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2. ]
## [ 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
## [ 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4. ]
## [ 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5. ]]
##
## [[ 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6. ]
## [ 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7. ]
## [ 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8. ]
## [ 8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9 9. ]
## [ 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10. ]]]
# better to be explicit using the .multiply / .dot functions
print(np.multiply(z2, z3)) # element-wise multiplication
# dot product does not work with these dimensions
## [[[ 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
## [ 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2. ]
## [ 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3. ]
## [ 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4. ]
## [ 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5. ]]
##
## [[ 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6. ]
## [ 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7. ]
## [ 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8. ]
## [ 8.1 8.2 8.3 8.4 8.5 8.6 8.7 8.8 8.9 9. ]
## [ 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10. ]]]
try:
print(np.dot(z3, z3))
except:
print("Error:", sys.exc_info()[1])
# try again with compatible dimensions
## Error: shapes (2,5,10) and (2,5,10) not aligned: 10 (dim 2) != 5 (dim 1)
print(np.dot(z3.reshape(10, 10), z3.reshape(10, 10)))
## [[ 33.55 34.1 34.65 35.2 35.75 36.3 36.85 37.4 37.95 38.5 ]
## [ 79.55 81.1 82.65 84.2 85.75 87.3 88.85 90.4 91.95 93.5 ]
## [125.55 128.1 130.65 133.2 135.75 138.3 140.85 143.4 145.95 148.5 ]
## [171.55 175.1 178.65 182.2 185.75 189.3 192.85 196.4 199.95 203.5 ]
## [217.55 222.1 226.65 231.2 235.75 240.3 244.85 249.4 253.95 258.5 ]
## [263.55 269.1 274.65 280.2 285.75 291.3 296.85 302.4 307.95 313.5 ]
## [309.55 316.1 322.65 329.2 335.75 342.3 348.85 355.4 361.95 368.5 ]
## [355.55 363.1 370.65 378.2 385.75 393.3 400.85 408.4 415.95 423.5 ]
## [401.55 410.1 418.65 427.2 435.75 444.3 452.85 461.4 469.95 478.5 ]
## [447.55 457.1 466.65 476.2 485.75 495.3 504.85 514.4 523.95 533.5 ]]
# R
# are also performed element-wise by default
z_plus = z2 + z3
str(z_plus)
## num [1:2, 1:5, 1:10] 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2 ...
z_times = z2 * z3
str(z_times)
## num [1:2, 1:5, 1:10] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
# in data frame format
z2_df %>%
# combine with z3 data frame that has the same dimensions
full_join(z3_df, by = c("x", "y", "z"), suffix = c(".z2", ".z3")) %>%
# make any element by element calculations
mutate(
z_plus = value.z2 + value.z3,
z_times = value.z2 * value.z3
)
# matrix multiplication is a bit trickier, a few examples
(v <- 1:4)
## [1] 1 2 3 4
v %*% v
## [,1]
## [1,] 30
(w <- diag(v))
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 0 2 0 0
## [3,] 0 0 3 0
## [4,] 0 0 0 4
w %*% v
## [,1]
## [1,] 1
## [2,] 4
## [3,] 9
## [4,] 16
(m <- array(1:12, dim = c(4,3)))
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
v %*% m
## [,1] [,2] [,3]
## [1,] 30 70 110
tryCatch(m %*% m, error = function(e) message("Error: ", e$message))
## Error: non-conformable arguments
# python - element wise logic
print(a < 20)
# logic on multiple values
## [ True False True]
print(np.logical_or(a < 4.0, a > 20.0))
# find out what the values are of the elements that match this condition
## [ True True False]
print(a[np.logical_or(a < 4.0, a > 20.0)])
# logic for multidimensional arrays
## [ 3.14 60. ]
print(z3[np.logical_or(z3 < 0.5, z3 > 9)])
## [ 0.1 0.2 0.3 0.4 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10. ]
# R - also element wise logic
print(a < 20)
## [1] TRUE FALSE TRUE
print(a < 4.0 | a > 20.0) # since everything is a vector, this works directly with vector logic (&, |)
## [1] TRUE TRUE FALSE
print(a[a < 4.0 | a > 20.0])
## [1] 3.14 60.00
# logic for multidimensional arrays
print(z3[z3 < 0.5 | z3 > 9])
## [1] 0.1 0.2 0.3 0.4 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9 10.0
# or the data frame version
z3_df %>% filter(value < 0.5 | value > 9)
\[ \frac{dC}{dt} = -kC + P \]
Analytical solution
\[ C(t) = \frac{P}{k}\left(1-e^{-kt}\right) \]
Numerical solution using finite difference.
\[ \frac{dC}{dt} \approx \frac{C^{t+1} - C^t}{\Delta t} = -kC^t + P \\ C^{t+1} = C^t - k \Delta t C^t + P \Delta t \]
# python - define parameters
C = 0.0
dt = 100.0
run_duration = 2.0e6
num_time_steps = int(run_duration / dt)
hl = 730000
k = np.log(2.0)/hl
P = 20.0
C = np.zeros(num_time_steps)
# python - run model
for i in range(1,num_time_steps):
C[i] = C[i-1] + dt * (-k * C[i-1] + P)
# python - plot model result
import matplotlib.pyplot as plt
t = np.arange(0.0, run_duration / 1000.0, dt / 1000.0)
plt.plot(t, C)
plt.xlabel("Time [ky]")
plt.ylabel("C")
plt.show()
# R - define parameters
# could do this in global variables but more common would use a data frame
params <- data_frame(
C0 = 0.0,
dt = 100.0,
run_duration = 2.0e6,
num_time_steps = as.integer(run_duration / dt),
hl = 730000,
k = log(2.0)/hl,
P = 20.0
)
params
# R - initialize data outline
data <- data_frame(
# time axis (in 1000 years)
time.ka = seq(0, by = params$dt/1000, length.out = params$num_time_steps),
# data with lots of NAs
C = c(params$C0, rep(NA, params$num_time_steps - 1))
)
data
# run the numerical model
for (i in 2:params$num_time_steps) {
data$C[i] <- data$C[i-1] + params$dt * (-params$k * data$C[i-1] + params$P)
}
# R - plot
data %>%
ggplot() +
aes(x = time.ka, y = C) +
geom_line() +
theme_bw() +
labs(x = "Time (ky)", y = "C")