This post is part of my preparation series for the Cloudera CCA175 exam, “Certified Spark and Hadoop Developer”. It is intentionally concise, to serve me as a cheat sheet.

You need familiarity with Python as well as Scala for the Cloudera exam. I have never coded in Scala before, so I am taking this course and preparing a brief Scala summary in this post.

Fortunately, the spark-shell is very helpful in the exam, and you don’t need to compile any .jar files.

Scala Basics

// Values are immutable (can't be changed once defined):
val hello: String = "Hello!"
println(hello)
 
// Variables are mutable:
var good_morrow: String = hello
good_morrow = hello + " Good morning!"
 
// Variable types:
val myInt: Int = 1
val myBool: Boolean = true
val myLetter: Char = 'a'
val pi: Float = 3.14159265f
val pi: Double = 3.1415926535
val myByte: Byte = 127
val largeInt: Long = 123451234512
 
// println() can do string interpolation:
println(f"Pi is approx. $pi%.3f")
println(s"$hello Good day! $good_morrow")
println(s"Three plus four equals ${3+4}")
 
// Expressions get evaluated and the last statement is returned:
println({val x=10; x + 20})  // this will print out 30.

Strings

val s = List("I see a little silhouetto of a man",
             "scaramouche, scaramouche, will you do the fandango")
 
s.map(_.startsWith("scar"))
s.filter(_.contains("little"))
s.flatMap(_.split(" ")) 
s.flatMap(_.split(" ")).filter(_.length < 4)

Regular expressions

val myString: String = "And I would walk 500 miles"
 
// create a regex pattern with triple quotes and an appended '.r'
val pattern = """.* ([\d]+).*""".r  // this matches the first number in a string
val pattern(resultString) = myString  // Scala's very weird syntax to apply a regular expression
 
val result = resultString.toInt  // .toFloat etc. all exist too, of course

Boolean logic

val certainly = 1 < 2
val unlikely = 4 < 0
 
val nope = certainly & unlikely
val nope2 = certainly && unlikely  // same result, but this version bails early, i.e. does not evaluate 'unlikely'
 
val password = "dootdoot"
val entry = "123456"
val match: Boolean = entry == password

Flow control

// If/else
 
if (1<3) println("Yes!") else println("No :(")
 
if (1 < 3){
  println("Yes!")
} else {
  println("No :(")
}
 
// Matching (switch/case)
 
val n = 2
n match {
  case 1 => println("One")
  case 2 => println("Two")
  case 3 => println("Three")
  case _ => println("Default output")
}
 
// For loops
 
for (x <- 1 to 4) {  // The '<-' is a range operator in Scala
  val squared = x*x
  println(squared)
}
 
// While loops
 
var x = 100
while(x >= 0){
  println(s"$x bottles of beer on the wall.")
  x -= 1
}
 
do {
  println(x)
  x += 1
} while (x <= 10)

Functions

// Definition:
 
def isEven(x: Int) : Boolean = {
    x%2 == 0
}
 
println(isEven(15))
 
// void return type:
def print_something(text: String) = {
    println("### " + text + " ###")
}
 
// Functions as parameters:
 
def squareIt(x: Int) : Int {
    x*x
}
 
def transformInt(x: Int, fct: Int => Int) : Int {
    fct(x)
}
 
val result = transformInt(2, squareIt)
 
// Lambda functions (anonymous functions)
 
transformInt(15, (x: Int) => x*x*x)  // full notation
transformInt(15, x => x*x*x)  // type inferred
transformInt(15, scala.math.pow(_, 3))  // shortcut via placeholder
 
transformInt(12, x => {val y=x+3; y*y})  // this implements (x+3)^2

Data structures

// Tuples: Much like in Python, a list of immutables:
 
val myTuple = ("Good", "morning", "Sir", 3.14159)
 
// Weird indexing syntax:
println(myTuple._1)  // index is one-based
 
// Key/value pair
val myKV = "house" -> "Haus"
println(myKV._2)  // "Haus"
 
// Lists: Elements must all be of same type.
val inventory = List("Apples", "Oranges", "Bananas")
println(inventory(0))  // Lists have *zero based indexing*
println(inventory.head)  // first element
println(inventory.tail)  // all but first element
 
for (item <- inventory){  // Range operator '<-' for lists
    println(item)
}

Working with lists

// Mapping
val myList = List("I", "see", "a", "little", "silhouetto", "of", "a", "man")
val reversedStrings = myList.map( (x: String) => {x.reverse})
for (str <- reversedStrings) {println(str)}
 
// Reducing
val intList = List(1, 2, 3, 4, 5)
val sum = intList.reduce( (x: Int, y:Int) => x+y)
println(sum)
intList.reduce(_ + _)  // Shortcut
 
// Filtering
intList.filter( (x:Int) => x != 3)
intList.filter( _ != 3)  // A shortcut: Wildcard syntax
 
// Iterating
intList.foreach(x => println(x))
intList.foreach(println)  // same
 
// Concatenating lists
val newList = List(6, 7, 8)
val bigList = intList ++ newList
 
// Some list methods
bigList.reverse
bigList.sorted
bigList.distinct
bigList.max
bigList.sum
bigList.contains(3)

Maps

// Like dictionaries in Python
 
val EngGer = Map("House" -> "Haus", "Bench" -> "Bank", "Bank" -> "Bank")
println(EngGer("Bench"))
 
println(EngGer.contains("Tree"))
val TreeInGerman = util.Try(EngGer("Tree")) getOrElse "Unknown"
 
// Adding entries:
EngGer += ("Car" -> "Auto")