Research Article

Big Data Analytics for the ATLAS EventIndex Project with Apache Spark

Algorithm 1

Overlap calculation algorithm pseudocode.
// Step 1
method map1 (events)
 for all x in events do
  emit(x.EventId, x.Stream)
// Step 2
method reduce1 ( eventId, Stream [s1, s2, ...])
 EventStreams <= new AssociativeArray
 for all stream in [s1, s2, ...] do
  EventStreams <= stream
 emit (EventId, EventStreams)
// Step 3
AllStreamsList <= new List(s1, s2, .., sn)
method map2 (EventId, EventStreams)
 numStreams = length(AllStreamsList)
 for i in 1 to numStreams do
  for j in i+1 to numStreams do
   streamI <= AllStreamsList[i]
   streamJ <= AllStreamsList[j]
   isInI <= true if EventStreams{streamI} exists
   isInJ <= true if EventStreams{streamJ} exists
   if isInI or isInJ do
    emit (pair(streamI, streamJ), pair(isInI, isInJ))
// Step 4
method reduce2 (Pair(StreamI, StreamJ), Pair(isInI, isInJ) [p1, p2, ...])
 events_stream1_only <= 0
 events_stream2_only <= 0
 events_both_streams <= 0
 ratio <= 0
 for all p(i, j) in [p1, p2, ...] do
  case (true, false) : events_stream1_only <= events_stream1_only + 1
  case (false, true) : events_stream2_only <= events_stream2_only + 1
  case (true, true) : events_both_streams <= events_both + 1
 ratio <= events_both_streams /
  (events_stream1_only + events_stream2_only + events_both_streams)
 emit (pair(StreamI, StreamJ), events_stream1_only,
  events_stream2_only, events_both_streams, ratio))