Skip to content

Build / SparkR-only (master, 4.4.2, windows-2022) #318

Build / SparkR-only (master, 4.4.2, windows-2022)

Build / SparkR-only (master, 4.4.2, windows-2022) #318

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
name: "Build / SparkR-only (master, 4.4.2, windows-2022)"
on:
schedule:
- cron: '0 17 * * *'
workflow_dispatch:
jobs:
build:
name: "Build module: sparkr"
runs-on: windows-2022
timeout-minutes: 300
if: github.repository == 'apache/spark'
steps:
- name: Download winutils Hadoop binary
uses: actions/checkout@v4
with:
repository: cdarlint/winutils
- name: Move Hadoop winutil into home directory
run: |
Move-Item -Path hadoop-3.3.6 -Destination ~\
- name: Checkout Spark repository
uses: actions/checkout@v4
- name: Cache Maven local repository
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: build-sparkr-windows-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
build-sparkr-windows-maven-
- name: Install Java 17
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: 17
- name: Install R 4.4.2
uses: r-lib/actions/setup-r@v2
with:
r-version: 4.4.2
- name: Install R dependencies
run: |
Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]"
shell: cmd
# SparkR build does not need Python. However, it shows warnings when the Python version is too low during
# the attempt to look up Python Data Sources for session initialization. The Windows 2019 runner
# includes Python 3.7, which Spark does not support. Therefore, we simply install the proper Python
# for simplicity, see SPARK-47116.
- name: Install Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'
architecture: x64
- name: Build Spark
run: |
rem 1. '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
rem See SPARK-28759.
rem 2. Ideally we should check the tests related to Hive in SparkR as well (SPARK-31745).
rem 3. setup-java installs Maven 3.8.7 but does not allow changing its version, so overwrite
rem Maven version as a workaround.
mvn -DskipTests -Psparkr -Djna.nosys=true package -Dmaven.version=3.8.7
shell: cmd
- name: Run SparkR tests
run: |
set HADOOP_HOME=%USERPROFILE%\hadoop-3.3.6
set PATH=%HADOOP_HOME%\bin;%PATH%
.\bin\spark-submit2.cmd --driver-java-options "-Dlog4j.configurationFile=file:///%CD:\=/%/R/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
shell: cmd
env:
NOT_CRAN: true
SPARKR_SUPPRESS_DEPRECATION_WARNING: 1
# See SPARK-27848. Currently installing some dependent packages causes
# "(converted from warning) unable to identify current timezone 'C':" for an unknown reason.
# This environment variable works around to test SparkR against a higher version.
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true