Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion app/services/external_apis/ror_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,13 @@ def search(term:, filters: [])
def query_ror(term:, page: 1, filters: [])
return [] unless term.present?

# Percent-encode the term
# (HTTParty.get() throws InvalidURIError when given non-ASCII characters)
encoded_term = URI.encode_www_form_component(term)

# build the URL
target = "#{api_base_url}#{search_path}"
query = query_string(term: term, page: page, filters: filters)
query = query_string(term: encoded_term, page: page, filters: filters)

# Call the ROR API and log any errors
resp = http_get(uri: "#{target}?#{query}", additional_headers: {},
Expand Down
133 changes: 133 additions & 0 deletions app/services/orgs/update_ror_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# frozen_string_literal: true

module Orgs
# Service object that updates Org records with ROR and FundRef identifiers, and writes results to CSV
# Invoked by the `orgs:update_ror_data` Rake task (lib/tasks/orgs.rake)
module UpdateRorService
extend self

CSV_FILE_PATH = Rails.root.join('tmp', 'ror_fundref_ids.csv')
CSV_HEADERS = %w[org_id org_name ror_name ror_id fundref_id weight].freeze

def run(update_existing: false)
ror, fundref = fetch_identifier_schemes
# Only proceed if the identifier schemes and the ROR API are all available
return unless ror && fundref && ror_service_available?

print_intro_message

CSV.open(CSV_FILE_PATH, 'w', write_headers: true, headers: CSV_HEADERS) do |csv|
org_scope.each { |org| process_org(org, ror, fundref, csv, update_existing: update_existing) }
end
end

private

def process_org(org, ror, fundref, csv, update_existing: false)
# If the Org already has a ROR identifier, skip it
return if !update_existing && org_has_ror_identifier?(org, ror)

results = ror_search_results_for_org(org)
result = best_match_from_results(results) if results.any?
if result.present?
handle_matched_result(org, ror, fundref, result, csv)
else
handle_unmatched_result(org, csv)
end
end

def fetch_identifier_schemes
ror = IdentifierScheme.find_by(name: 'ror')
fundref = IdentifierScheme.find_by(name: 'fundref')

if ror.nil? || fundref.nil?
puts "Missing IdentifierScheme(s): ror: #{ror.inspect}, fundref: #{fundref.inspect}"
puts 'Both must exist in DB for this task to run.'
end
[ror, fundref]
end

def ror_service_available?
ok = ExternalApis::RorService.ping
unless ok
puts 'ROR appears to be offline or your configuration is invalid. ' \
'Heartbeat check failed. Refer to the log for more information.'
end
ok
end

def org_has_ror_identifier?(org, ror)
org.identifiers.any? { |id| id.identifier_scheme_id == ror.id }
end

def print_intro_message
puts <<~MSG
Scanning ROR for each of your existing Orgs.
The results will be written to "#{CSV_FILE_PATH}" to facilitate#{' '}
review and any corrections that may need to be made.
The CSV file contains the Org name stored in your DB next to the ROR org#{' '}
name that was matched. Use these 2 values to determine if the match was valid.
You can use the ROR search page to find the correct match for any organizations#{' '}
that need to be corrected: https://ror.org/search

MSG
end

def org_scope
scope = Org.includes(identifiers: :identifier_scheme)
.where(managed: true, is_other: false)
.order(:name)
puts "Found #{scope.size} org(s) to process."
scope
end

def ror_search_results_for_org(org)
# The abbreviation sometimes causes weird results so strip it off in this instance
org_name = org.name.gsub(" (#{org.abbreviation})", '')
OrgSelection::SearchService.search_externally(search_term: org_name)
end

def best_match_from_results(results)
# Find the best match
# (See OrgSelection::SearchService#weigh for how weight is calculated.)
results.find { |r| r[:weight].zero? } || results.find { |r| r[:weight] == 1 }
end

def handle_unmatched_result(org, csv)
puts "⚠️ No results found for Org with id: #{org.id} and name: #{org.name}"
csv << [org.id, org.name, nil, nil, nil, nil]
end

def handle_matched_result(org, ror, fundref, result, csv)
return unless result[:ror].present? || result[:fundref].present?

# Save ROR and FUNDREF entries to DB
identifiers = handle_identifiers(org, ror, fundref, result)
# Add entry to generated CSV
csv << [org.id, org.name, result[:name], identifiers[:ror]&.value, identifiers[:fundref]&.value, result[:weight]]
end

def handle_identifiers(org, ror, fundref, result)
{
ror: handle_identifier(org, ror, result[:ror], result[:name], 'ROR'),
fundref: handle_identifier(org, fundref, result[:fundref], result[:name], 'FUNDREF')
}
end

def handle_identifier(org, identifier_scheme, id, name, label)
return unless id.present?

identifier = Identifier.find_or_initialize_by(identifiable: org,
identifier_scheme: identifier_scheme)
begin
identifier.update!(value: "#{identifier_scheme.identifier_prefix}#{id}")
puts "✅ Updated #{org.name} -> #{label}: #{identifier.value}, #{name}"
rescue StandardError => e
message = "❌ Failed to update #{org.name} -> #{label}: #{e.message}"
puts message
Rails.logger.error(message)
end
identifier
end
end
end
4 changes: 2 additions & 2 deletions app/views/orgs/_external_identifiers.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<div class="form-control mb-3 col-xs-10">
<% id = presenter.id_for_scheme(scheme: scheme) %>
<span class="bold"><%= scheme.description %>:</span>
<%= id_for_display(id: id) %>
<%= id_for_display(id: id, with_scheme_name: false) %>
</div>
</div>
<% end %>
Expand Down Expand Up @@ -62,7 +62,7 @@
<div class="form-control mb-3 col-xs-10">
<% id = presenter.id_for_scheme(scheme: scheme) %>
<span class="bold"><%= scheme.description %>:</span>
<%= id_for_display(id: id) %>
<%= id_for_display(id: id, with_scheme_name: false) %>
</div>
</div>
<% end %>
Expand Down
2 changes: 1 addition & 1 deletion config/initializers/external_apis/ror.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# the API and to verify that your configuration settings are correct,
# please refer to: https://github.com/ror-community/ror-api
Rails.configuration.x.ror.landing_page_url = 'https://ror.org/'
Rails.configuration.x.ror.api_base_url = 'https://api.ror.org/'
Rails.configuration.x.ror.api_base_url = 'https://api.ror.org/v1/'
Rails.configuration.x.ror.heartbeat_path = 'heartbeat'
Rails.configuration.x.ror.search_path = 'organizations'
Rails.configuration.x.ror.max_pages = 2
Expand Down
16 changes: 16 additions & 0 deletions db/seeds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,22 @@
description: 'Your institutional credentials',
active: true,
context: 11
},
{
name: 'fundref',
description: 'Crossref Funder Registry (FundRef)',
active: true,
identifier_prefix: 'https://api.crossref.org/funders/',
# Only add the :for_orgs context
context: 2
},
{
name: 'ror',
description: 'Research Organization Registry (ROR)',
active: true,
identifier_prefix: 'https://ror.org/',
# Only add the :for_orgs context
context: 2
}
]
identifier_schemes.each { |is| IdentifierScheme.find_or_create_by(is) }
Expand Down
12 changes: 12 additions & 0 deletions lib/tasks/orgs.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

namespace :orgs do
desc 'Updates DB and Creates CSV with Org-related ROR/Fundref data'
task update_ror_data: :environment do
# By default, existing ROR/Fundref data is not updated.
# - To update existing data, prepend `UPDATE_EXISTING=true`
# - (e.g. `UPDATE_EXISTING=true bundle exec rake orgs:update_ror_data`)
update_existing = ENV['UPDATE_EXISTING'] == 'true'
Orgs::UpdateRorService.run(update_existing: update_existing)
end
end
Loading