@inproceedings{1af0c1b1df704688969067ce16323b37,
title = "Lightweight multi-language bindings for Apache Spark",
abstract = "Apache Spark has emerged as one of the most prominent frameworks for distributed high-performance data analysis. Among Spark{\textquoteright}s most appealing features are its bindings for dynamic languages such as Python and R. Despite of the great flexibility of such languages, they often cannot match the performance of statically typed languages such as Java or Scala. However, this limitation is not only due to the intrinsic nature of dynamically typed languages. Largely, the performance gap is caused by the way the language runtimes interact with Spark. In this paper we describe a new approach to integrating Python and R into data-intensive Spark applications. Our approach significantly reduces the performance gap between such languages and their statically typed counterpart, making dynamic languages an attractive alternative for the implementation of big-data applications.",
author = "Luca Salucci and Daniele Bonetta and Walter Binder",
year = "2016",
doi = "10.1007/978-3-319-43659-3_21",
language = "English",
isbn = "9783319436586",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "281--292",
editor = "P.-F. Dutot and D. Trystram",
booktitle = "Parallel Processing - 22nd International Conference on Parallel and Distributed Computing, Euro-Par 2016, Proceedings",
note = "22nd International Conference on Parallel and Distributed Computing, Euro-Par 2016 ; Conference date: 24-08-2016 Through 26-08-2016",
}