Functions and control flow are the backbone of R programming. They enable you to write reusable, organized code and control program logic. Whether you’re creating custom functions, using conditional logic, or iterating over data, mastering these constructs is essential for effective R programming.

This comprehensive guide covers everything from basic function creation to advanced control flow patterns with practical examples for every scenario.

Creating Functions

Functions are reusable blocks of code that perform specific tasks. They improve code organization, reduce duplication, and enable modularity.

Function Basics

# Basic function definition
function_name <- function(argument1, argument2) {
  # Function body
  result <- argument1 + argument2
  return(result)
}

# Call the function
output <- function_name(5, 3)
print(output)
# [1] 8

# Function without return statement (returns last expression)
add_numbers <- function(x, y) {
  x + y  # Last expression is returned automatically
}
print(add_numbers(10, 20))
# [1] 30

Function Arguments

# Positional arguments
greet <- function(first_name, last_name) {
  paste(first_name, last_name)
}
print(greet("John", "Doe"))
# [1] "John Doe"

# Default arguments
calculate_grade <- function(score, passing = 60) {
  if (score >= passing) {
    "Pass"
  } else {
    "Fail"
  }
}
print(calculate_grade(75))      # [1] "Pass"
print(calculate_grade(50))      # [1] "Fail"
print(calculate_grade(55, 50))  # [1] "Pass"

# Named arguments
print(greet(last_name = "Smith", first_name = "Jane"))
# [1] "Jane Smith"

# Variable length arguments (...)
print_all <- function(...) {
  args <- list(...)
  for (i in seq_along(args)) {
    print(paste("Argument", i, ":", args[[i]]))
  }
}
print_all("a", "b", "c", "d")

Return Values

# Explicit return
calculate_stats <- function(x) {
  return(list(
    mean = mean(x),
    sd = sd(x),
    min = min(x),
    max = max(x)
  ))
}
stats <- calculate_stats(c(1, 2, 3, 4, 5))
print(stats)

# Implicit return (last expression)
get_summary <- function(x) {
  data.frame(
    count = length(x),
    total = sum(x),
    average = mean(x)
  )
}
print(get_summary(1:10))

# Multiple returns using list
analyze_data <- function(x) {
  list(
    summary = summary(x),
    stats = c(mean = mean(x), sd = sd(x))
  )
}

Function Scope

# Global vs local variables
global_var <- 100

test_scope <- function() {
  local_var <- 50      # Local to function
  global_var <- 200    # Creates local copy, doesn't modify global
  print(local_var)     # [1] 50
  print(global_var)    # [1] 200
}

test_scope()
print(global_var)      # [1] 100 (unchanged)

# Accessing global variables
global_var <- 100

modify_global <- function() {
  global_var <<- 200   # <<- modifies global variable
}

modify_global()
print(global_var)      # [1] 200 (changed!)

Error Handling in Functions

# try() - catches errors without stopping
safe_divide <- function(a, b) {
  result <- try(a / b, silent = TRUE)
  if (class(result) == "try-error") {
    return(NA)
  }
  return(result)
}
print(safe_divide(10, 2))  # [1] 5
print(safe_divide(10, 0))  # [1] Inf
print(safe_divide(10, "a"))  # [1] NA

# tryCatch() - advanced error handling
read_safe <- function(filename) {
  tryCatch(
    {
      data <- read.csv(filename)
      return(data)
    },
    error = function(e) {
      print(paste("Error reading file:", e$message))
      return(NULL)
    },
    warning = function(w) {
      print(paste("Warning:", w$message))
    }
  )
}

# stopifnot() - validate inputs
validate_age <- function(age) {
  stopifnot(is.numeric(age), age >= 0, age <= 150)
  print("Age is valid")
}
validate_age(25)      # [1] "Age is valid"
# validate_age("abc") # Error

Conditional Statements

Conditional statements control the flow of execution based on conditions.

if Statement

# Simple if
score <- 85
if (score >= 80) {
  print("Excellent!")
}

# if with multiple conditions
age <- 25
income <- 50000
if (age >= 21 & income >= 40000) {
  print("Eligible for loan")
}

# if with complex logic
if (age < 13) {
  category <- "Child"
} else if (age < 18) {
  category <- "Teen"
} else if (age < 65) {
  category <- "Adult"
} else {
  category <- "Senior"
}
print(category)

ifelse() Function

# Vectorized conditional
scores <- c(85, 92, 78, 95, 88)
grades <- ifelse(scores >= 90, "A",
         ifelse(scores >= 80, "B",
         ifelse(scores >= 70, "C", "F")))
print(grades)
# [1] "B" "A" "C" "A" "B"

# Nested ifelse for multiple conditions
age <- c(5, 15, 25, 65, 75)
category <- ifelse(age < 13, "Child",
            ifelse(age < 18, "Teen",
            ifelse(age < 65, "Adult", "Senior")))
print(category)

switch() Statement

# switch for discrete values
get_color <- function(fruit) {
  switch(fruit,
    apple = "red",
    banana = "yellow",
    grape = "purple",
    "unknown"  # default
  )
}
print(get_color("apple"))     # [1] "red"
print(get_color("orange"))    # [1] "unknown"

# switch with numbers
day_num <- 3
day_name <- switch(day_num,
  "Monday",
  "Tuesday",
  "Wednesday",
  "Thursday",
  "Friday",
  "Saturday",
  "Sunday"
)
print(day_name)  # [1] "Wednesday"

Loops

Loops repeat a block of code multiple times.

for Loop

# Basic for loop
for (i in 1:5) {
  print(i)
}

# Loop over vector
fruits <- c("apple", "banana", "cherry")
for (fruit in fruits) {
  print(fruit)
}

# Loop with index
for (i in seq_along(fruits)) {
  print(paste(i, ":", fruits[i]))
}

# Nested loops
for (i in 1:3) {
  for (j in 1:3) {
    print(paste(i, "x", j, "=", i*j))
  }
}

# Loop with break
for (i in 1:10) {
  if (i == 5) break
  print(i)
}

# Loop with next (skip iteration)
for (i in 1:5) {
  if (i == 3) next
  print(i)
}

while Loop

# Basic while loop
i <- 1
while (i <= 5) {
  print(i)
  i <- i + 1
}

# while with condition
count <- 0
while (count < 3) {
  print("Counting...")
  count <- count + 1
}

# while with break
i <- 1
while (TRUE) {
  if (i > 5) break
  print(i)
  i <- i + 1
}

repeat Loop

# repeat loop (infinite loop with break)
i <- 1
repeat {
  print(i)
  i <- i + 1
  if (i > 5) break
}

# User input simulation
attempts <- 0
repeat {
  attempts <- attempts + 1
  print(paste("Attempt:", attempts))
  if (attempts >= 3) break
}

Apply Family Functions

Apply functions vectorize operations efficiently without explicit loops.

apply()

# apply() on matrix/data frame
data <- matrix(1:12, nrow = 3, ncol = 4)

# Apply to rows (margin = 1)
row_sums <- apply(data, 1, sum)
print(row_sums)
# [1] 22 26 30

# Apply to columns (margin = 2)
col_means <- apply(data, 2, mean)
print(col_means)
# [1] 2 5 8 11

lapply() and sapply()

# lapply() returns list
numbers <- list(a = 1:3, b = 4:6, c = 7:9)
sums_list <- lapply(numbers, sum)
print(sums_list)
# $a
# [1] 6
# $b
# [1] 15
# $c
# [1] 24

# sapply() simplifies to vector
sums_vec <- sapply(numbers, sum)
print(sums_vec)
# a  b  c
# 6 15 24

# With custom function
double_it <- function(x) x * 2
doubled <- sapply(1:5, double_it)
print(doubled)
# [1]  2  4  6  8 10

mapply()

# mapply() for multiple arguments
x <- c(1, 2, 3)
y <- c(10, 20, 30)
z <- c(100, 200, 300)

add_three <- function(a, b, c) a + b + c
results <- mapply(add_three, x, y, z)
print(results)
# [1] 111 222 333

# Simplify parameter
results_list <- mapply(add_three, x, y, z, SIMPLIFY = FALSE)

tapply()

# tapply() for grouped operations
groups <- c("A", "B", "A", "B", "A")
values <- c(10, 20, 15, 25, 30)

group_sums <- tapply(values, groups, sum)
print(group_sums)
# A  B
# 55 45

Complete Practical Examples

Building a Statistics Function

calculate_all_stats <- function(x, na.rm = TRUE) {
  # Validate input
  if (!is.numeric(x)) {
    stop("Input must be numeric")
  }

  # Remove NAs if requested
  if (na.rm) {
    x <- na.omit(x)
  }

  # Calculate statistics
  list(
    mean = mean(x),
    median = median(x),
    sd = sd(x),
    min = min(x),
    max = max(x),
    n = length(x)
  )
}

stats <- calculate_all_stats(c(1, 2, 3, 4, 5, NA))
print(stats)

Processing Multiple Files

process_files <- function(file_pattern) {
  files <- list.files(pattern = file_pattern)

  results <- lapply(files, function(file) {
    tryCatch({
      data <- read.csv(file)
      list(
        file = file,
        rows = nrow(data),
        cols = ncol(data),
        status = "success"
      )
    },
    error = function(e) {
      list(
        file = file,
        status = "error",
        message = e$message
      )
    })
  })

  return(results)
}

Best Practices

  1. Keep functions focused - One function, one job
  2. Use meaningful names - Function names should describe what they do
  3. Document with comments - Explain complex logic
  4. Validate inputs - Use stopifnot() or explicit checks
  5. Use vectorized operations - Faster than loops when possible
  6. Handle errors gracefully - Use tryCatch() for robustness
  7. Return consistent types - Don’t sometimes return NULL, sometimes data
  8. Keep scope clean - Avoid global variables when possible

Common Questions

Q: When should I use apply vs loop? A: Use apply for conciseness and speed. Use loops when logic is complex or you need fine control.

Q: How do I exit a loop early? A: Use break to exit completely, or next to skip to next iteration.

Q: Should functions always return values? A: Functions can return values (print last expression) or NULL. Be consistent.

Q: How do I pass functions as arguments? A: Functions are first-class objects. Pass function name without parentheses: lapply(data, my_function)

Q: What’s the difference between sapply and lapply? A: sapply simplifies output to vector/matrix if possible, lapply always returns list.

Build on functions and control flow:

Download R Script

Get all code examples from this tutorial: functions-control-flow-examples.R