From 7fe25ffdcbdc57f1daa240242c0b3d4c8edb31b3 Mon Sep 17 00:00:00 2001 From: AmroAlJundi Date: Thu, 1 Dec 2022 15:27:44 +0300 Subject: [PATCH 1/3] Updated library version in /CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 02eb2a26..edd54713 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ else() cmake_policy(VERSION 3.18) endif() -project( SparseBase_project VERSION 0.2.4 ) +project( SparseBase_project VERSION 0.3.0 ) option(RUN_TESTS "Enable running tests" OFF) option(_HEADER_ONLY "Use the library as a header only library?" OFF) option(USE_CUDA "Enable CUDA" OFF) From 0a8c7901f5caf6bd2a7a6c22217236c7726e41a1 Mon Sep 17 00:00:00 2001 From: AmroAlJundi Date: Fri, 2 Dec 2022 11:57:48 +0300 Subject: [PATCH 2/3] Set Ubuntu GitHub actions to run on Ubuntu 22.04 --- .github/workflows/docs_release.yml | 2 +- .github/workflows/testing.yml | 6 +++--- CMakeLists.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index 3138bff7..a0abff9b 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 # Steps represent a sequence of tasks that will be executed as part of the job diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index b6edecae..45fc4e1e 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -77,7 +77,7 @@ jobs: run: ctest -V test_ubuntu: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 @@ -115,7 +115,7 @@ jobs: test_header_only: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 @@ -152,7 +152,7 @@ jobs: test_opt_dep: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 diff --git a/CMakeLists.txt b/CMakeLists.txt index edd54713..e98b35f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ else() cmake_policy(VERSION 3.18) endif() -project( SparseBase_project VERSION 0.3.0 ) +project( SparseBase_project VERSION 0.3.1 ) option(RUN_TESTS "Enable running tests" OFF) option(_HEADER_ONLY "Use the library as a header only library?" OFF) option(USE_CUDA "Enable CUDA" OFF) From df3122e69c10de6fe85a627fbb7646b69b0b43cb Mon Sep 17 00:00:00 2001 From: Sinan Ekmekcibasi Date: Thu, 8 Dec 2022 15:24:54 +0300 Subject: [PATCH 3/3] initial commit --- examples/CMakeLists.txt | 47 +- examples/download_module/CMakeLists.txt | 2 + examples/download_module/download_module.cc | 51 ++ src/sparsebase/io/download_extract.cc | 758 ++++++++++++++++++++ src/sparsebase/io/download_extract.h | 77 ++ src/sparsebase/utils/utils.cc | 66 ++ src/sparsebase/utils/utils.h | 21 + 7 files changed, 999 insertions(+), 23 deletions(-) create mode 100644 examples/download_module/CMakeLists.txt create mode 100644 examples/download_module/download_module.cc create mode 100644 src/sparsebase/io/download_extract.cc create mode 100644 src/sparsebase/io/download_extract.h diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 841e97dc..d1589d0d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,31 +1,32 @@ configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/run_all_examples.py - ${CMAKE_CURRENT_BINARY_DIR}/run_all_examples.py - COPYONLY + ${CMAKE_CURRENT_SOURCE_DIR}/run_all_examples.py + ${CMAKE_CURRENT_BINARY_DIR}/run_all_examples.py + COPYONLY ) file(COPY ${CMAKE_SOURCE_DIR}/examples/data DESTINATION ${CMAKE_BINARY_DIR}/examples/) if (${USE_CUDA}) - add_subdirectory(cuda_example) - add_subdirectory(array_example) - add_subdirectory(example_experiment) -endif() -if(NOT ${_HEADER_ONLY} OR NOT ${USE_CUDA}) - add_subdirectory(csr_coo) - add_subdirectory(degree_order) - add_subdirectory(custom_order) - add_subdirectory(format_conversion) - add_subdirectory(rcm_order) - add_subdirectory(sparse_feature) - add_subdirectory(sparse_reader) - add_subdirectory(custom_format) - add_subdirectory(custom_converter) - add_subdirectory(custom_experiment) - add_subdirectory(linear_solver) + add_subdirectory(cuda_example) + add_subdirectory(array_example) + add_subdirectory(example_experiment) +endif () +if (NOT ${_HEADER_ONLY} OR NOT ${USE_CUDA}) + add_subdirectory(csr_coo) + add_subdirectory(degree_order) + add_subdirectory(custom_order) + add_subdirectory(format_conversion) + add_subdirectory(rcm_order) + add_subdirectory(sparse_feature) + add_subdirectory(sparse_reader) + add_subdirectory(custom_format) + add_subdirectory(custom_converter) + add_subdirectory(custom_experiment) + add_subdirectory(linear_solver) + add_subdirectory(download_module) - if(${USE_METIS}) - add_subdirectory(metis_partition) - endif() + if (${USE_METIS}) + add_subdirectory(metis_partition) + endif () -endif() +endif () diff --git a/examples/download_module/CMakeLists.txt b/examples/download_module/CMakeLists.txt new file mode 100644 index 00000000..ce54d113 --- /dev/null +++ b/examples/download_module/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(download_module ${CMAKE_CURRENT_SOURCE_DIR}/download_module.cc) +target_link_libraries(download_module sparsebase) diff --git a/examples/download_module/download_module.cc b/examples/download_module/download_module.cc new file mode 100644 index 00000000..ba455b2a --- /dev/null +++ b/examples/download_module/download_module.cc @@ -0,0 +1,51 @@ +#include + +#include +#include + +#include "sparsebase/format/format.h" + +using namespace std; + +int main(int argc, char *argv[]) { + unordered_map alternative_res; + + sparsebase::io::SuiteSparseDownloader ssd( + "https://suitesparse-collection-website.herokuapp.com/MM/Meszaros/" + "p010.tar.gz"); + alternative_res = ssd.download(); + + for (auto &res : alternative_res) { + auto format = res.second->get_dimensions()[0]; + cout << res.first << " -- " << format << "\n"; + } + + sparsebase::io::SnapDownloader sd( + "http://snap.stanford.edu/data/p2p-Gnutella05.txt.gz"); + + alternative_res = sd.download(); + + for (auto &res : alternative_res) { + auto format = res.second->get_dimensions()[0]; + cout << res.first << " -- " << format << "\n"; + } + + sparsebase::io::NetworkRepositoryDownloader nrd( + "https://nrvis.com/download/data/asn/" + "aves-barn-swallow-contact-network.zip"); + alternative_res = nrd.download(); + + for (auto &res : alternative_res) { + auto format = res.second->get_dimensions()[0]; + cout << res.first << " -- " << format << "\n"; + } + + sparsebase::io::NetworkRepositoryDownloader nrd2( + "https://nrvis.com/download/data/misc/1138_bus.zip"); + alternative_res = nrd2.download(); + + for (auto &res : alternative_res) { + auto format = res.second->get_dimensions()[0]; + cout << res.first << " -- " << format << "\n"; + } +} \ No newline at end of file diff --git a/src/sparsebase/io/download_extract.cc b/src/sparsebase/io/download_extract.cc new file mode 100644 index 00000000..2536e1a0 --- /dev/null +++ b/src/sparsebase/io/download_extract.cc @@ -0,0 +1,758 @@ +#include "sparsebase/io/download_extract.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sparsebase/format/format.h" +#include "sparsebase/io/edge_list_reader.h" +#include "sparsebase/io/mtx_reader.h" +#include "sparsebase/io/reader.h" +#include "sparsebase/utils/utils.h" + +#define WIN 0 +#define MACOSX 0 +#define LINUX 0 +#define UNIX 0 + +#ifdef _WIN64 +#define WIN 1 +#elif _WIN32 +#define WIN 1 +#elif __APPLE__ +#define MACOSX 1 +#elif __linux +#define LINUX 1 +#elif __unix +#define UNIX 1 +#endif + +typedef unsigned int ui; + +namespace sparsebase { + +namespace io { +/// +DownloadExtractModule::DownloadExtractModule(std::string fileUrl) + : downloadUrl(fileUrl) {} + +std::string DownloadExtractModule::returnFileName() { + std::string url = this->downloadUrl; + + int i = url.size() - 1; + + for (; i >= 0; i--) { + if (url[i] == '/') { + break; + } + } + this->filePath = url.substr(i + 1, url.size() - 1); + return this->filePath; +} + +int DownloadExtractModule::returnFileType(std::string filePath) { + int i = 0; + std::string fileExtension; + for (; i < filePath.size(); i++) { + if (filePath[i] == '.') break; + } + fileExtension = filePath.substr(i + 1, filePath.size() - 1); + + if (fileExtension == "tar.gz") { + return 1; + } else if (fileExtension == "gz" || fileExtension == "txt.gz") { + return 2; + } else if (fileExtension == "zip") { + return 3; + } else if (fileExtension == "7z") { + return 4; + } else if (fileExtension == "tgz") { + return 5; + } else { + throw std::invalid_argument("Unsupported archive type!"); + } + + // if it is not a supported file type + return -1; +} + +bool Downloader::download(const std::string &url, const std::string &filePath) { + std::system(("curl -L -o " + filePath + " " + url).c_str()); + + return true; +} + +int Extractor::extract(const std::string &filename) { + std::string folderName; + + for (int i = 0; i < filename.size(); i++) { + if (filename[i] == '.') { + folderName = filename.substr(0, i); + break; + } + } + + int res = returnFileType(filename); + if (res == 1 || res == 5) { + std::system(("tar xzvf " + filename).c_str()); + } else if (res == 2) { + std::system(("mkdir ")); + std::system(("gzip -d " + filename + " -k").c_str()); + } else if (res == 3) { + std::system(("unzip " + filename + " -d " + folderName).c_str()); + } + + if (MACOSX or UNIX or LINUX) { + std::system(("rm " + filename).c_str()); + } else if (WIN) { + std::system(("del " + filename).c_str()); + } + + return 1; +} + +/// +SuiteSparseDownloader::SuiteSparseDownloader(std::string matrixName, + std::string location) + : matrixName(matrixName), matrixLocation(location) {} + +std::unordered_map +SuiteSparseDownloader::download() { + std::string downloadUrl; + std::string matrixGroupName; + std::string matrixName; + std::string filepath; + std::unordered_map alternative_res; + + // Check if given argument is url or just group/matrixName + std::regex regexURL( + "\\b((?:https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:, " + ".;]*[-a-zA-Z0-9+&@#/%=~_|])"); + if (std::regex_match(this->matrixName.c_str(), regexURL)) { + downloadUrl = this->matrixName; + } else { + downloadUrl = "https://suitesparse-collection-website.herokuapp.com/MM/" + + this->matrixName + ".tar.gz"; + } + + // Extract matrix name and matrix group name to separate variables + // Whether given argument is url or just group/matrixName + int temp = 0; + for (int i = downloadUrl.size() - 1, j = 0; i >= 0; i--) { + if (downloadUrl[i] == '/') { + if (j == 0) { + filepath = downloadUrl.substr(i + 1, downloadUrl.size() - i - 1); + temp = i; + j = 1; + } else { + matrixGroupName = downloadUrl.substr(i + 1, temp - i - 1); + break; + } + } + } + + // Extracting matrix name from .tar.gz + for (int i = 0; i < filepath.size(); i++) { + if (filepath[i] == '.') { + matrixName = filepath.substr(0, i); + break; + } + } + + // Set the location for files. + // If user did not give any specific location, then files will be located + // in user's HOME directory. + if (this->matrixLocation == "DEFAULT") { + const char *homedir; + + if ((homedir = std::getenv("HOME")) == NULL) { + homedir = getpwuid(getuid())->pw_dir; + } + std::filesystem::current_path(homedir); + } else { + std::filesystem::current_path(this->matrixLocation); + } + + // SparseBase/ directory + std::filesystem::path pForSparseBase("SparseBase"); + if (!(std::filesystem::exists(pForSparseBase))) { + std::filesystem::create_directory(pForSparseBase); + } + std::filesystem::current_path(pForSparseBase); + + // SparseBase/cache directory + std::filesystem::path pForCache("cache"); + if (!(std::filesystem::exists(pForCache))) { + std::filesystem::create_directory(pForCache); + } + std::filesystem::current_path(pForCache); + + // SparseBase/cache/SuiteSparse directory + std::filesystem::path pForSuiteSparse("SuiteSparse"); + if (!(std::filesystem::exists(pForSuiteSparse))) { + std::filesystem::create_directory(pForSuiteSparse); + } + std::filesystem::current_path(pForSuiteSparse); + + // SparseBase/cache/SuiteSparse/ directory + std::filesystem::path pForMatrixGroup(matrixGroupName); + if (!(std::filesystem::exists(pForMatrixGroup))) { + std::filesystem::create_directory(pForMatrixGroup); + } + std::filesystem::current_path(pForMatrixGroup); + + // SparseBase/cache/SuiteSparse// directory + std::filesystem::path pForMatrix(matrixName); + if (!(std::filesystem::exists(pForMatrix))) { + if (Downloader::download(downloadUrl, filepath)) { + int res = Extractor::extract(filepath); + if (res == -1) { + throw std::runtime_error("Error in unarchiving files!"); + } + } else { + std::cout << "Something went wrong!" + << "\n"; + } + } + std::filesystem::current_path(pForMatrix); + + std::filesystem::path result_path(std::filesystem::current_path()); + + for (auto &entry : std::filesystem::directory_iterator(result_path)) { + std::string filename; + std::string _path = entry.path().u8string(); + for (int i = _path.size(); i >= 0; i--) { + if (_path[i] == '/') { + filename = _path.substr(i + 1, _path.size() - i); + break; + } + } + + filename = _path; + std::ifstream fin(_path); + if (fin.is_open()) { + std::string line, buf; + std::getline(fin, line); + + sparsebase::utils::MatrixMarket::MTXOptions options; + options = sparsebase::utils::MatrixMarket::ParseHeader(line); + + if (options.field == 0) { + sparsebase::io::MTXReader reader(_path); + + if (options.format == 0) { + format::COO *coo = reader.ReadCOO(); + auto *_format = coo; + alternative_res.insert({filename, _format}); + } else { + format::Array *arr = reader.ReadArray(); + format::Format *_format = arr; + alternative_res.insert({filename, _format}); + } + } else if (options.field == 1) { + sparsebase::io::MTXReader reader(_path); + if (options.format == 0) { + format::COO *coo = reader.ReadCOO(); + format::Format *_format = coo; + alternative_res.insert({filename, _format}); + } else { + format::Array *arr = reader.ReadArray(); + format::Format *_format = arr; + alternative_res.insert({filename, _format}); + } + } else if (options.field == 3) { + sparsebase::io::MTXReader reader(_path); + if (options.format == 0) { + format::COO *coo = reader.ReadCOO(); + format::Format *_format = coo; + alternative_res.insert({filename, _format}); + } else { + format::Array *arr = reader.ReadArray(); + format::Format *_format = arr; + alternative_res.insert({filename, _format}); + } + } + } + fin.close(); + } + return alternative_res; +} + +NetworkRepositoryDownloader::NetworkRepositoryDownloader(std::string matrixName, + std::string location) + : matrixName(matrixName), matrixLocation(location) {} + +void NetworkRepositoryDownloader::changePermission(std::string &path) { + struct stat st; + if (stat(path.c_str(), &st) == 0) { + mode_t perm = st.st_mode; + if (!(perm & S_IRUSR)) { + chmod(path.c_str(), S_IRUSR); + std::cout << "READ PERMISSION ENABLED!" + << "\n"; + } + } +} + +std::unordered_map +NetworkRepositoryDownloader::download() { + std::string downloadUrl; + std::string matrixGroupName; + std::string matrixName; + std::string filepath; + std::unordered_map alternative_res; + + // Check if given argument is url or just group/matrixName + std::regex regexURL( + "\\b((?:https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:, " + ".;]*[-a-zA-Z0-9+&@#/%=~_|])"); + if (std::regex_match(this->matrixName.c_str(), regexURL)) { + downloadUrl = this->matrixName; + } else { + //TODO:Construct links from a given matrix name, if possible + throw std::runtime_error("Please provide a link!"); + } + + int temp = 0; + for (int i = downloadUrl.size() - 1, j = 0; i >= 0; i--) { + if (downloadUrl[i] == '/') { + if (j == 0) { + filepath = downloadUrl.substr(i + 1, downloadUrl.size() - i - 1); + temp = i; + j = 1; + } else { + matrixGroupName = downloadUrl.substr(i + 1, temp - i - 1); + break; + } + } + } + + // Extracting matrix name from .tar.gz + for (int i = 0; i < filepath.size(); i++) { + if (filepath[i] == '.') { + matrixName = filepath.substr(0, i); + break; + } + } + + // Set the location for files. + // If user did not give any specific location, then files will be located + // in user's HOME directory. + if (this->matrixLocation == "DEFAULT") { + const char *homedir; + + if ((homedir = std::getenv("HOME")) == NULL) { + homedir = getpwuid(getuid())->pw_dir; + } + std::filesystem::current_path(homedir); + } else { + std::filesystem::current_path(this->matrixLocation); + } + + // SparseBase/ directory + std::filesystem::path pForSparseBase("SparseBase"); + if (!(std::filesystem::exists(pForSparseBase))) { + std::filesystem::create_directory(pForSparseBase); + } + std::filesystem::current_path(pForSparseBase); + + // SparseBase/cache directory + std::filesystem::path pForCache("cache"); + if (!(std::filesystem::exists(pForCache))) { + std::filesystem::create_directory(pForCache); + } + std::filesystem::current_path(pForCache); + + // SparseBase/cache/SuiteSparse directory + std::filesystem::path pForNetworkRepository("NetworkRepository"); + if (!(std::filesystem::exists(pForNetworkRepository))) { + std::filesystem::create_directory(pForNetworkRepository); + } + std::filesystem::current_path(pForNetworkRepository); + + // SparseBase/cache/NetworkRepository/ directory + std::filesystem::path pForMatrixGroup(matrixGroupName); + if (!(std::filesystem::exists(pForMatrixGroup))) { + std::filesystem::create_directory(pForMatrixGroup); + } + std::filesystem::current_path(pForMatrixGroup); + + // SparseBase/cache/NetworkRepository// directory + std::filesystem::path pForMatrix(matrixName); + if (!(std::filesystem::exists(pForMatrix))) { + if (Downloader::download(downloadUrl, filepath)) { + int res = Extractor::extract(filepath); + } else { + std::cout << "Something went wrong!" + << "\n"; + } + } + std::filesystem::current_path(pForMatrix); + + std::filesystem::path result_path(std::filesystem::current_path()); + std::string result_path_string{result_path.u8string()}; + + std::unordered_map resulting_paths; + + for (auto &entry : std::filesystem::directory_iterator(result_path)) { + std::string filename; + std::string _path = entry.path().u8string(); + std::string fileExtension; + for (int i = _path.size(); i >= 0; i--) { + if (_path[i] == '/') { + filename = _path.substr(i + 1, _path.size() - i); + break; + } + } + + for (int i = filename.size(); i >= 0; i--) { + if (filename[i] == '.') { + fileExtension = filename.substr(i + 1, filename.size() - i); + break; + } + } + + filename = _path; + NetworkRepositoryDownloader::changePermission(_path); + std::ifstream fin(_path); + if (fin.is_open()) { + if (fileExtension == "mtx") { + std::string line, buf; + std::getline(fin, line); + + sparsebase::utils::MatrixMarket::MTXOptions options; + options = sparsebase::utils::MatrixMarket::ParseHeader(line); + if (options.field == 0) { + sparsebase::io::MTXReader reader(_path); + + if (options.format == 0) { + format::COO *coo = reader.ReadCOO(); + auto *_format = coo; + alternative_res.insert({filename, _format}); + } else { + format::Array *arr = reader.ReadArray(); + format::Format *_format = arr; + alternative_res.insert({filename, _format}); + } + } else if (options.field == 1) { + sparsebase::io::MTXReader reader(_path); + if (options.format == 0) { + format::COO *coo = reader.ReadCOO(); + format::Format *_format = coo; + alternative_res.insert({filename, _format}); + } else { + format::Array *arr = reader.ReadArray(); + format::Format *_format = arr; + alternative_res.insert({filename, _format}); + } + } else if (options.field == 3) { + sparsebase::io::MTXReader reader(_path); + if (options.format == 0) { + format::COO *coo = reader.ReadCOO(); + format::Format *_format = coo; + alternative_res.insert({filename, _format}); + } else { + format::Array *arr = reader.ReadArray(); + format::Format *_format = arr; + alternative_res.insert({filename, _format}); + } + } + } else if (fileExtension == "edges") { + bool weighted = false; + std::string line, buf; + std::getline(fin, line); + std::istringstream stream(line); + int counter = 0; + while (stream >> buf) { + counter++; + } + if (counter == 3) { + weighted = true; + } + + sparsebase::io::EdgeListReader edgeListReader(_path, + weighted); + format::CSR *csr = edgeListReader.ReadCSR(); + auto *_format = csr; + alternative_res.insert({filename, _format}); + } + } + fin.close(); + } + return alternative_res; +} + +SnapDownloader::SnapDownloader(std::string matrixName, std::string location) + : matrixName(matrixName), matrixLocation(location) {} + +//TODO:Do not this method, instead add check to the EdgeListReader +void SnapDownloader::cleanFile(std::string path) { + std::string filename = path; + std::ifstream edgeFile(path); + + std::ofstream temp; + temp.open("temp.txt"); + + if (edgeFile.is_open()) { + std::string line; + while (std::getline(edgeFile, line)) { + if (line[0] != '#') { + temp << line << std::endl; + } + } + } + edgeFile.close(); + temp.close(); + std::rename("temp.txt", filename.c_str()); +} + +std::unordered_map SnapDownloader::download() { + std::string downloadUrl; + std::string matrixGroupName; + std::string matrixName; + std::string filepath; + std::unordered_map alternative_res; + + // Check if given argument is url or just group/matrixName + std::regex regexURL( + "\\b((?:https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:, " + ".;]*[-a-zA-Z0-9+&@#/%=~_|])"); + if (std::regex_match(this->matrixName.c_str(), regexURL)) { + for (int i = this->matrixName.size(); i >= 0; i--) { + if (this->matrixName[i] == '.') { + if (this->matrixName.substr(i + 1, this->matrixName.size() - i - 1) == + "html") { + downloadUrl = this->matrixName.substr( + 0, this->matrixName.size() - + (this->matrixName.size() - i - 1)) + + "txt.gz"; + } else { + downloadUrl = this->matrixName; + } + } + } + } else { + throw std::invalid_argument("SnapDownloader only excepts links!"); + } + + // Extract matrix name and matrix group name to separate variables + // Whether given argument is url or just group/matrixName + int temp = 0; + for (int i = downloadUrl.size() - 1, j = 0; i >= 0; i--) { + if (downloadUrl[i] == '/') { + if (j == 0) { + filepath = downloadUrl.substr(i + 1, downloadUrl.size() - i - 1); + temp = i; + j = 1; + } else { + matrixGroupName = downloadUrl.substr(i + 1, temp - i - 1); + break; + } + } + } + + // Extracting matrix name from .tar.gz + for (int i = 0; i < filepath.size(); i++) { + if (filepath[i] == '.') { + matrixName = filepath.substr(0, i); + break; + } + } + + // Set the location for files. + // If user did not give any specific location, then files will be located + // in user's HOME directory. + if (this->matrixLocation == "DEFAULT") { + const char *homedir; + + if ((homedir = std::getenv("HOME")) == NULL) { + homedir = getpwuid(getuid())->pw_dir; + } + std::filesystem::current_path(homedir); + } else { + std::filesystem::current_path(this->matrixLocation); + } + + // SparseBase/ directory + std::filesystem::path pForSparseBase("SparseBase"); + if (!(std::filesystem::exists(pForSparseBase))) { + std::filesystem::create_directory(pForSparseBase); + } + std::filesystem::current_path(pForSparseBase); + + // SparseBase/cache directory + std::filesystem::path pForCache("cache"); + if (!(std::filesystem::exists(pForCache))) { + std::filesystem::create_directory(pForCache); + } + std::filesystem::current_path(pForCache); + + // SparseBase/cache/SuiteSparse directory + std::filesystem::path pForSnap("Snap"); + if (!(std::filesystem::exists(pForSnap))) { + std::filesystem::create_directory(pForSnap); + } + std::filesystem::current_path(pForSnap); + + // SparseBase/cache/SuiteSparse/ directory + std::filesystem::path pForMatrixGroup(matrixGroupName); + if (!(std::filesystem::exists(pForMatrixGroup))) { + std::filesystem::create_directory(pForMatrixGroup); + } + std::filesystem::current_path(pForMatrixGroup); + + // SparseBase/cache/SuiteSparse// directory + std::filesystem::path pForMatrix(matrixName + ".txt"); + if (!(std::filesystem::exists(pForMatrix))) { + if (Downloader::download(downloadUrl, filepath)) { + int res = Extractor::extract(filepath); + if (res == -1) { + throw std::runtime_error("Error in unarchiving files!"); + } + } else { + throw std::invalid_argument("Something went wrong!"); + } + } + + this->cleanFile(pForMatrix.string()); + + sparsebase::io::EdgeListReader reader(pForMatrix.string()); + format::CSR *csr = reader.ReadCSR(); + auto *_format = csr; + alternative_res.insert( + {std::filesystem::absolute(pForMatrix).string(), _format}); + + return alternative_res; +} +/* +FrosttDownloader::FrosttDownloader(std::string matrixName, std::string location) + : matrixName(matrixName), matrixLocation(location) {} + +std::unordered_map FrosttDownloader::download() { + std::string downloadUrl; + std::string matrixGroupName; + std::string matrixName; + std::string filepath; + std::unordered_map alternative_res; + + // Check if given argument is url or just group/matrixName + std::regex regexURL( + "\\b((?:https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:, " + ".;]*[-a-zA-Z0-9+&@#/%=~_|])"); + + if (std::regex_match(this->matrixName.c_str(), regexURL)) { + for (int i = this->matrixName.size(); i >= 0; i--) { + if (this->matrixName[i] == '.') { + if (this->matrixName.substr(i + 1, this->matrixName.size() - i - 1) == + "html") { + downloadUrl = this->matrixName.substr( + 0, this->matrixName.size() - + (this->matrixName.size() - i - 1)) + + "txt.gz"; + } else { + downloadUrl = this->matrixName; + } + } + } + } else { + throw std::invalid_argument("Frostt only excepts links!"); + } + + // Extract matrix name and matrix group name to separate variables + // Whether given argument is url or just group/matrixName + int temp = 0; + for (int i = downloadUrl.size() - 1, j = 0; i >= 0; i--) { + if (downloadUrl[i] == '/') { + if (j == 0) { + filepath = downloadUrl.substr(i + 1, downloadUrl.size() - i - 1); + temp = i; + j = 1; + } else { + matrixGroupName = downloadUrl.substr(i + 1, temp - i - 1); + std::cout << downloadUrl.substr(i + 1, temp - i - 1) << "\n"; + break; + } + } + } + + // Extracting matrix name from .tar.gz + for (int i = 0; i < filepath.size(); i++) { + if (filepath[i] == '.') { + matrixName = filepath.substr(0, i); + break; + } + } + + // Set the location for files. + // If user did not give any specific location, then files will be located + // in user's HOME directory. + if (this->matrixLocation == "DEFAULT") { + const char *homedir; + if ((homedir = std::getenv("HOME")) == NULL) { + homedir = getpwuid(getuid())->pw_dir; + } + std::filesystem::current_path(homedir); + } else { + std::filesystem::current_path(this->matrixLocation); + } + + // SparseBase/ directory + std::filesystem::path pForSparseBase("SparseBase"); + if (!(std::filesystem::exists(pForSparseBase))) { + std::filesystem::create_directory(pForSparseBase); + } + std::filesystem::current_path(pForSparseBase); + + // SparseBase/cache directory + std::filesystem::path pForCache("cache"); + if (!(std::filesystem::exists(pForCache))) { + std::filesystem::create_directory(pForCache); + } + std::filesystem::current_path(pForCache); + + // SparseBase/cache/Frostt directory + std::filesystem::path pForSnap("Frostt"); + if (!(std::filesystem::exists(pForSnap))) { + std::filesystem::create_directory(pForSnap); + } + std::filesystem::current_path(pForSnap); + + // SparseBase/cache/Frostt/ directory + std::filesystem::path pForMatrixGroup(matrixGroupName); + if (!(std::filesystem::exists(pForMatrixGroup))) { + std::filesystem::create_directory(pForMatrixGroup); + } + std::filesystem::current_path(pForMatrixGroup); + + // SparseBase/cache/SuiteSparse// directory + std::filesystem::path pForMatrix(matrixName); + if (!(std::filesystem::exists(pForMatrix))) { + if (Downloader::download(downloadUrl, filepath)) { + int res = Extractor::extract(filepath); + if (res == -1) { + throw std::runtime_error("Error in unarchiving files!"); + } + } else { + throw std::invalid_argument("Something went wrong!"); + } + } + + sparsebase::io::EdgeListReader reader(pathForData); + format::CSR *csr = reader.ReadCSR(); + auto *_format = csr; + alternative_res.insert({pathForData, _format}); + + return alternative_res; +} + */ +} // namespace io +} // namespace sparsebase \ No newline at end of file diff --git a/src/sparsebase/io/download_extract.h b/src/sparsebase/io/download_extract.h new file mode 100644 index 00000000..2064be3d --- /dev/null +++ b/src/sparsebase/io/download_extract.h @@ -0,0 +1,77 @@ +#ifndef SPARSEBASE_SPARSEBASE_UTILS_IO_DOWNLOADEXTRACTMODULE_H_ +#define SPARSEBASE_SPARSEBASE_UTILS_IO_DOWNLOADEXTRACTMODULE_H_ + +#include +#include + +#include "sparsebase/format/format.h" +namespace sparsebase { + +namespace io { + +class DownloadExtractModule { + public: + std::string downloadUrl, filePath; + DownloadExtractModule(std::string fileUrl); + std::string returnFileName(); + static int returnFileType(std::string filePath); +}; + +class Downloader : public DownloadExtractModule { + public: + std::string deneme; + Downloader(); + static bool download(const std::string &url, const std::string &filePath); +}; + +class Extractor : public DownloadExtractModule { + public: + static int extract(const std::string &filename); +}; + +class SuiteSparseDownloader { + public: + std::string matrixName; + std::string matrixLocation; + SuiteSparseDownloader(std::string matrixName, + std::string location = "DEFAULT"); + + std::unordered_map download(); +}; + +class NetworkRepositoryDownloader { + public: + std::string matrixName; + std::string matrixLocation; + NetworkRepositoryDownloader(std::string matrixName, + std::string location = "DEFAULT"); + std::unordered_map download(); + static void changePermission(std::string &path); +}; + +class SnapDownloader { + public: + std::string matrixName; + std::string matrixLocation; + SnapDownloader(std::string matrixName, std::string location = "DEFAULT"); + + std::unordered_map download(); + void cleanFile(std::string path); +}; + +class FrosttDownloader { + public: + std::string matrixName; + std::string matrixLocation; + FrosttDownloader(std::string matrixName, std::string location = "DEFAULT"); + + std::unordered_map download(); +}; + +} // namespace io + +} // namespace sparsebase +#ifdef _HEADER_ONLY +#include "sparsebase/utils/io/download_extract.cc" +#endif +#endif // SPARSEBASE_SPARSEBASE_UTILS_IO_DOWNLOADEXTRACTMODULE_H_ \ No newline at end of file diff --git a/src/sparsebase/utils/utils.cc b/src/sparsebase/utils/utils.cc index 92d2d962..2e74dcb2 100644 --- a/src/sparsebase/utils/utils.cc +++ b/src/sparsebase/utils/utils.cc @@ -4,9 +4,12 @@ #include #include +#include #include "exception.h" +#define MMX_PREFIX "%%MatrixMarket" + namespace sparsebase::utils { std::size_t TypeIndexVectorHash::operator()( const std::vector& vf) const { @@ -28,4 +31,67 @@ std::string demangle(const std::string& name) { std::string demangle(std::type_index type) { return demangle(type.name()); } +namespace MatrixMarket { + +MTXOptions ParseHeader(std::string header_line) { + std::stringstream line_ss(header_line); + MTXOptions options; + std::string prefix, object, format, field, symmetry; + line_ss >> prefix >> object >> format >> field >> symmetry; + if (prefix != MMX_PREFIX) + // throw utils::ReaderException("Wrong prefix in a matrix market file"); + // parsing Object option + if (object == "matrix") { + options.object = MTXObjectOptions::matrix; + } else if (object == "vector") { + options.object = MTXObjectOptions::matrix; + throw utils::ReaderException( + "Matrix market reader does not currently support reading vectors."); + } else { + throw utils::ReaderException( + "Illegal value for the 'object' option in matrix market header"); + } + // parsing format option + if (format == "array") { + options.format = MTXFormatOptions::array; + } else if (format == "coordinate") { + options.format = MTXFormatOptions::coordinate; + } else { + throw utils::ReaderException( + "Illegal value for the 'format' option in matrix market header"); + } + // parsing field option + if (field == "real") { + options.field = MTXFieldOptions::real; + } else if (field == "double") { + options.field = MTXFieldOptions::double_field; + } else if (field == "complex") { + options.field = MTXFieldOptions::complex; + } else if (field == "integer") { + options.field = MTXFieldOptions::integer; + } else if (field == "pattern") { + options.field = MTXFieldOptions::pattern; + } else { + throw utils::ReaderException( + "Illegal value for the 'field' option in matrix market header"); + } + // parsing symmetry + if (symmetry == "general") { + options.symmetry = MTXSymmetryOptions::general; + } else if (symmetry == "symmetric") { + options.symmetry = MTXSymmetryOptions::symmetric; + } else if (symmetry == "skew-symmetric") { + options.symmetry = MTXSymmetryOptions::skew_symmetric; + } else if (symmetry == "hermitian") { + options.symmetry = MTXSymmetryOptions::hermitian; + throw utils::ReaderException( + "Matrix market reader does not currently support hermitian symmetry."); + } else { + throw utils::ReaderException( + "Illegal value for the 'symmetry' option in matrix market header"); + } + return options; +} +} // namespace MatrixMarket + } // namespace sparsebase::utils diff --git a/src/sparsebase/utils/utils.h b/src/sparsebase/utils/utils.h index 61461739..42ddc8f6 100644 --- a/src/sparsebase/utils/utils.h +++ b/src/sparsebase/utils/utils.h @@ -21,6 +21,27 @@ namespace sparsebase::utils { +namespace MatrixMarket { + +enum MTXObjectOptions { matrix, vector }; +enum MTXFormatOptions { coordinate, array }; +enum MTXFieldOptions { real, double_field, complex, integer, pattern }; +enum MTXSymmetryOptions { + general = 0, + symmetric = 1, + skew_symmetric = 2, + hermitian = 3 +}; +struct MTXOptions { + MTXObjectOptions object; + MTXFormatOptions format; + MTXFieldOptions field; + MTXSymmetryOptions symmetry; +}; +MTXOptions ParseHeader(std::string header_line); + +} // namespace MatrixMarket + //! Type used for calculating function costs typedef float CostType; // Thanks to artificial mind blog: