diff --git a/app/services/external_apis/ror_service.rb b/app/services/external_apis/ror_service.rb index e639bf2b94..3c646105c9 100644 --- a/app/services/external_apis/ror_service.rb +++ b/app/services/external_apis/ror_service.rb @@ -81,9 +81,13 @@ def search(term:, filters: []) def query_ror(term:, page: 1, filters: []) return [] unless term.present? + # Percent-encode the term + # (HTTParty.get() throws InvalidURIError when given non-ASCII characters) + encoded_term = URI.encode_www_form_component(term) + # build the URL target = "#{api_base_url}#{search_path}" - query = query_string(term: term, page: page, filters: filters) + query = query_string(term: encoded_term, page: page, filters: filters) # Call the ROR API and log any errors resp = http_get(uri: "#{target}?#{query}", additional_headers: {}, diff --git a/app/services/orgs/update_ror_service.rb b/app/services/orgs/update_ror_service.rb new file mode 100644 index 0000000000..360f640504 --- /dev/null +++ b/app/services/orgs/update_ror_service.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true + +module Orgs + # Service object that updates Org records with ROR and FundRef identifiers, and writes results to CSV + # Invoked by the `orgs:update_ror_data` Rake task (lib/tasks/orgs.rake) + module UpdateRorService + extend self + + CSV_FILE_PATH = Rails.root.join('tmp', 'ror_fundref_ids.csv') + CSV_HEADERS = %w[org_id org_name ror_name ror_id fundref_id weight].freeze + + def run(update_existing: false) + ror, fundref = fetch_identifier_schemes + # Only proceed if the identifier schemes and the ROR API are all available + return unless ror && fundref && ror_service_available? + + print_intro_message + + CSV.open(CSV_FILE_PATH, 'w', write_headers: true, headers: CSV_HEADERS) do |csv| + org_scope.each { |org| process_org(org, ror, fundref, csv, update_existing: update_existing) } + end + end + + private + + def process_org(org, ror, fundref, csv, update_existing: false) + # If the Org already has a ROR identifier, skip it + return if !update_existing && org_has_ror_identifier?(org, ror) + + results = ror_search_results_for_org(org) + result = best_match_from_results(results) if results.any? + if result.present? + handle_matched_result(org, ror, fundref, result, csv) + else + handle_unmatched_result(org, csv) + end + end + + def fetch_identifier_schemes + ror = IdentifierScheme.find_by(name: 'ror') + fundref = IdentifierScheme.find_by(name: 'fundref') + + if ror.nil? || fundref.nil? + puts "Missing IdentifierScheme(s): ror: #{ror.inspect}, fundref: #{fundref.inspect}" + puts 'Both must exist in DB for this task to run.' + end + [ror, fundref] + end + + def ror_service_available? + ok = ExternalApis::RorService.ping + unless ok + puts 'ROR appears to be offline or your configuration is invalid. ' \ + 'Heartbeat check failed. Refer to the log for more information.' + end + ok + end + + def org_has_ror_identifier?(org, ror) + org.identifiers.any? { |id| id.identifier_scheme_id == ror.id } + end + + def print_intro_message + puts <<~MSG + Scanning ROR for each of your existing Orgs. + The results will be written to "#{CSV_FILE_PATH}" to facilitate#{' '} + review and any corrections that may need to be made. + The CSV file contains the Org name stored in your DB next to the ROR org#{' '} + name that was matched. Use these 2 values to determine if the match was valid. + You can use the ROR search page to find the correct match for any organizations#{' '} + that need to be corrected: https://ror.org/search + + MSG + end + + def org_scope + scope = Org.includes(identifiers: :identifier_scheme) + .where(managed: true, is_other: false) + .order(:name) + puts "Found #{scope.size} org(s) to process." + scope + end + + def ror_search_results_for_org(org) + # The abbreviation sometimes causes weird results so strip it off in this instance + org_name = org.name.gsub(" (#{org.abbreviation})", '') + OrgSelection::SearchService.search_externally(search_term: org_name) + end + + def best_match_from_results(results) + # Find the best match + # (See OrgSelection::SearchService#weigh for how weight is calculated.) + results.find { |r| r[:weight].zero? } || results.find { |r| r[:weight] == 1 } + end + + def handle_unmatched_result(org, csv) + puts "⚠️ No results found for Org with id: #{org.id} and name: #{org.name}" + csv << [org.id, org.name, nil, nil, nil, nil] + end + + def handle_matched_result(org, ror, fundref, result, csv) + return unless result[:ror].present? || result[:fundref].present? + + # Save ROR and FUNDREF entries to DB + identifiers = handle_identifiers(org, ror, fundref, result) + # Add entry to generated CSV + csv << [org.id, org.name, result[:name], identifiers[:ror]&.value, identifiers[:fundref]&.value, result[:weight]] + end + + def handle_identifiers(org, ror, fundref, result) + { + ror: handle_identifier(org, ror, result[:ror], result[:name], 'ROR'), + fundref: handle_identifier(org, fundref, result[:fundref], result[:name], 'FUNDREF') + } + end + + def handle_identifier(org, identifier_scheme, id, name, label) + return unless id.present? + + identifier = Identifier.find_or_initialize_by(identifiable: org, + identifier_scheme: identifier_scheme) + begin + identifier.update!(value: "#{identifier_scheme.identifier_prefix}#{id}") + puts "✅ Updated #{org.name} -> #{label}: #{identifier.value}, #{name}" + rescue StandardError => e + message = "❌ Failed to update #{org.name} -> #{label}: #{e.message}" + puts message + Rails.logger.error(message) + end + identifier + end + end +end diff --git a/app/views/orgs/_external_identifiers.html.erb b/app/views/orgs/_external_identifiers.html.erb index 010180c216..04d738ac95 100644 --- a/app/views/orgs/_external_identifiers.html.erb +++ b/app/views/orgs/_external_identifiers.html.erb @@ -17,7 +17,7 @@
<% id = presenter.id_for_scheme(scheme: scheme) %> <%= scheme.description %>: - <%= id_for_display(id: id) %> + <%= id_for_display(id: id, with_scheme_name: false) %>
<% end %> @@ -62,7 +62,7 @@
<% id = presenter.id_for_scheme(scheme: scheme) %> <%= scheme.description %>: - <%= id_for_display(id: id) %> + <%= id_for_display(id: id, with_scheme_name: false) %>
<% end %> diff --git a/config/initializers/external_apis/ror.rb b/config/initializers/external_apis/ror.rb index 2b37937234..c324638063 100644 --- a/config/initializers/external_apis/ror.rb +++ b/config/initializers/external_apis/ror.rb @@ -5,7 +5,7 @@ # the API and to verify that your configuration settings are correct, # please refer to: https://github.com/ror-community/ror-api Rails.configuration.x.ror.landing_page_url = 'https://ror.org/' -Rails.configuration.x.ror.api_base_url = 'https://api.ror.org/' +Rails.configuration.x.ror.api_base_url = 'https://api.ror.org/v1/' Rails.configuration.x.ror.heartbeat_path = 'heartbeat' Rails.configuration.x.ror.search_path = 'organizations' Rails.configuration.x.ror.max_pages = 2 diff --git a/db/seeds.rb b/db/seeds.rb index 21f84bdfb2..9d473d4170 100755 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -33,6 +33,22 @@ description: 'Your institutional credentials', active: true, context: 11 + }, + { + name: 'fundref', + description: 'Crossref Funder Registry (FundRef)', + active: true, + identifier_prefix: 'https://api.crossref.org/funders/', + # Only add the :for_orgs context + context: 2 + }, + { + name: 'ror', + description: 'Research Organization Registry (ROR)', + active: true, + identifier_prefix: 'https://ror.org/', + # Only add the :for_orgs context + context: 2 } ] identifier_schemes.each { |is| IdentifierScheme.find_or_create_by(is) } diff --git a/lib/tasks/orgs.rake b/lib/tasks/orgs.rake new file mode 100644 index 0000000000..2cb883dd72 --- /dev/null +++ b/lib/tasks/orgs.rake @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +namespace :orgs do + desc 'Updates DB and Creates CSV with Org-related ROR/Fundref data' + task update_ror_data: :environment do + # By default, existing ROR/Fundref data is not updated. + # - To update existing data, prepend `UPDATE_EXISTING=true` + # - (e.g. `UPDATE_EXISTING=true bundle exec rake orgs:update_ror_data`) + update_existing = ENV['UPDATE_EXISTING'] == 'true' + Orgs::UpdateRorService.run(update_existing: update_existing) + end +end