Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
/pkg/
/spec/reports/
/tmp/
/.vscode/

# rspec failure tracking
.rspec_status

/spec/fixtures/ISO-IEC_2382
/spec/fixtures/Characters-Glossarist
/spec/fixtures/Figures
19 changes: 19 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Auto-generated by Cimas: Do not edit it manually!
# See https://github.com/metanorma/cimas
inherit_from:
- https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
- .rubocop_todo.yml

inherit_mode:
merge:
- Exclude

# local repo-specific modifications
# ...
plugins:
- rubocop-rspec
- rubocop-performance
- rubocop-rake

AllCops:
TargetRubyVersion: 3.0
189 changes: 189 additions & 0 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2026-03-24 03:26:08 UTC using RuboCop version 1.86.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
Gemspec/RequireMFA:
Exclude:
- 'termium.gemspec'

# Offense count: 1
Gemspec/RequiredRubyVersion:
Exclude:
- 'termium.gemspec'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: IndentationWidth.
Layout/AssignmentIndentation:
Exclude:
- 'lib/termium/cli.rb'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyle.
# SupportedStyles: empty_lines, no_empty_lines
Layout/EmptyLinesAroundBlockBody:
Exclude:
- 'spec/termium_spec.rb'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyle.
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines
Layout/EmptyLinesAroundModuleBody:
Exclude:
- 'lib/termium.rb'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
# SupportedHashRocketStyles: key, separator, table
# SupportedColonStyles: key, separator, table
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
Layout/HashAlignment:
Exclude:
- 'lib/termium/cli.rb'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyle.
# SupportedStyles: normal, indented_internal_methods
Layout/IndentationConsistency:
Exclude:
- 'termium.gemspec'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: AllowDoxygenCommentStyle, AllowGemfileRubyComment, AllowRBSInlineAnnotation, AllowSteepAnnotation.
Layout/LeadingCommentSpace:
Exclude:
- 'Rakefile'

# Offense count: 15
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
# URISchemes: http, https
Layout/LineLength:
Exclude:
- 'lib/termium/cli.rb'
- 'lib/termium/core.rb'
- 'lib/termium/language_module.rb'
- 'lib/termium/subject.rb'
- 'lib/termium/textual_support.rb'
- 'spec/termium_spec.rb'
- 'termium.gemspec'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyle.
# SupportedStyles: final_newline, final_blank_line
Layout/TrailingEmptyLines:
Exclude:
- 'lib/termium.rb'

# Offense count: 2
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: AllowInHeredoc.
Layout/TrailingWhitespace:
Exclude:
- 'lib/termium/cli.rb'

# Offense count: 3
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
Metrics/AbcSize:
Exclude:
- 'lib/termium/cli.rb'
- 'lib/termium/core.rb'
- 'lib/termium/entry_term.rb'

# Offense count: 4
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
Metrics/MethodLength:
Max: 23

# Offense count: 2
# Configuration parameters: Mode, AllowedMethods, AllowedPatterns, AllowBangMethods, WaywardPredicates.
# AllowedMethods: call
# WaywardPredicates: infinite?, nonzero?
Naming/PredicateMethod:
Exclude:
- 'lib/termium/abbreviation.rb'
- 'lib/termium/entry_term.rb'

# Offense count: 4
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros, UseSorbetSigs.
# NamePrefix: is_, has_, have_, does_
# ForbiddenPrefixes: is_, has_, have_, does_
# AllowedMethods: is_a?
# MethodDefinitionMacros: define_method, define_singleton_method
Naming/PredicatePrefix:
Exclude:
- 'lib/termium/textual_support.rb'

# Offense count: 4
# This cop supports safe autocorrection (--autocorrect).
RSpec/EmptyLineAfterFinalLet:
Exclude:
- 'spec/termium/extract_spec.rb'
- 'spec/termium_spec.rb'

# Offense count: 1
# Configuration parameters: CountAsOne.
RSpec/ExampleLength:
Max: 8

# Offense count: 1
# Configuration parameters: AllowedPatterns.
# AllowedPatterns: ^expect_, ^assert_
RSpec/NoExpectationExample:
Exclude:
- 'spec/termium_spec.rb'

# Offense count: 2
# This cop supports unsafe autocorrection (--autocorrect-all).
Security/IoMethods:
Exclude:
- 'lib/termium/cli.rb'
- 'spec/termium_spec.rb'

# Offense count: 1
Style/ItAssignment:
Exclude:
- 'spec/spec_helper.rb'

# Offense count: 4
# This cop supports safe autocorrection (--autocorrect).
Style/RedundantFreeze:
Exclude:
- 'lib/termium/source.rb'
- 'lib/termium/textual_support.rb'

# Offense count: 2
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
# SupportedStyles: single_quotes, double_quotes
Style/StringLiterals:
Exclude:
- 'termium.gemspec'

# Offense count: 1
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyleForMultiline.
# SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
Style/TrailingCommaInArguments:
Exclude:
- 'spec/support/shared_examples.rb'

# Offense count: 2
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: EnforcedStyleForMultiline.
# SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
Style/TrailingCommaInHashLiteral:
Exclude:
- 'lib/termium/designation_operations.rb'
13 changes: 7 additions & 6 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ source "https://rubygems.org"

gemspec

gem "equivalent-xml"
gem "pry"
gem "canon"
gem "lutaml-model", github: "lutaml/lutaml-model", ref: "main"
gem "rake", "~> 13.0"
gem "rspec", "~> 3.11"
gem "rubocop", "~> 1.58"
gem "rubocop-performance", "~> 1.21", ">= 1.21.1"
gem "xml-c14n"
gem "rspec"
gem "rubocop"
gem "rubocop-performance"
gem "rubocop-rake"
gem "rubocop-rspec"
8 changes: 4 additions & 4 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ The resulting dataset will look like this:
----
{OUTPUT_PATH}/
├── concepts/
   ├── {CONCEPT_ID}.yaml
   ├── ...
├── {CONCEPT_ID}.yaml
├── ...
├── localized_concepts/
   ├── {LOCALIZED_CONCEPT_ID}.yaml
   ├── ...
├── {LOCALIZED_CONCEPT_ID}.yaml
├── ...
----

==== Usage
Expand Down
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ RSpec::Core::RakeTask.new(:spec)
# require "rubocop/rake_task"
# RuboCop::RakeTask.new

task default: %i[spec] #rubocop
task default: %i[spec] # rubocop
38 changes: 16 additions & 22 deletions lib/termium.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,22 @@

require "glossarist"

require "lutaml/model"
require "lutaml/model/xml_adapter/nokogiri_adapter"

Lutaml::Model::Config.configure do |config|
config.xml_adapter = Lutaml::Model::XmlAdapter::NokogiriAdapter
end

module Termium
class Error < StandardError; end
end

require_relative "termium/version"
require_relative "termium/extract"
require_relative "termium/extract_language"
require_relative "termium/core"
require_relative "termium/abbreviation"
require_relative "termium/designation_operations"
require_relative "termium/entry_term"
require_relative "termium/language_module"
require_relative "termium/parameter"
require_relative "termium/source"
require_relative "termium/source_ref"
require_relative "termium/subject"
require_relative "termium/textual_support"
require_relative "termium/universal_entry"
autoload :Version, "termium/version"
autoload :Extract, "termium/extract"
autoload :ExtractLanguage, "termium/extract_language"
autoload :Core, "termium/core"
autoload :Abbreviation, "termium/abbreviation"
autoload :DesignationOperations, "termium/designation_operations"
autoload :EntryTerm, "termium/entry_term"
autoload :LanguageModule, "termium/language_module"
autoload :Parameter, "termium/parameter"
autoload :Namespace, "termium/namespace"
autoload :Source, "termium/source"
autoload :SourceRef, "termium/source_ref"
autoload :Subject, "termium/subject"
autoload :TextualSupport, "termium/textual_support"
autoload :UniversalEntry, "termium/universal_entry"
end
6 changes: 1 addition & 5 deletions lib/termium/abbreviation.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# frozen_string_literal: true

require_relative "source_ref"
require_relative "parameter"
require_relative "designation_operations"

module Termium
# For <abbreviation>
class Abbreviation < Lutaml::Model::Serializable
Expand All @@ -20,7 +16,7 @@ class Abbreviation < Lutaml::Model::Serializable
# <parameter abbreviation="NORM" />

xml do
root "abbreviation"
element "abbreviation"
map_attribute "order", to: :order
map_attribute "value", to: :value
map_element "sourceRef", to: :source_ref
Expand Down
8 changes: 5 additions & 3 deletions lib/termium/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ module Termium
class Cli < Thor
desc "convert", "Convert TERMIUM entries into a Glossarist dataset"

option :input_file, aliases: :i, required: true, desc: "Path to TERMIUM Plus XML extract"
option :input_file, aliases: :i, required: true,
desc: "Path to TERMIUM Plus XML extract"
option :output_file, aliases: :o, desc: "Output file path"
option :date_accepted, desc: "Date of acceptance for the dataset"

Expand Down Expand Up @@ -46,14 +47,15 @@ def convert
input_path = input_file_as_path(options[:input_file])

puts "Reading TERMIUM export file: #{input_path.relative_path_from(Dir.pwd)}"
termium_extract = Termium::Extract.from_xml(IO.read(input_path.expand_path))
termium_extract = Termium::Extract.from_xml(File.read(input_path.expand_path))

puts "Size of TERMIUM dataset: #{termium_extract.core.size}"

puts "Converting to Glossarist..."
convert_options = {}
if options[:date_accepted]
convert_options[:date_accepted] = Date.parse(options[:date_accepted]).iso8601
convert_options[:date_accepted] =
Date.parse(options[:date_accepted]).iso8601
end
glossarist_col = termium_extract.to_concept(convert_options)
# pp glossarist_col.first
Expand Down
8 changes: 3 additions & 5 deletions lib/termium/core.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# frozen_string_literal: true

require "uuidtools"
require_relative "language_module"
require_relative "subject"
require_relative "universal_entry"
require_relative "source"

module Termium
# For <core>
Expand All @@ -18,7 +14,9 @@ class Core < Lutaml::Model::Serializable
attribute :source, Source, collection: true

xml do
root "core"
element "core"
ordered

map_attribute "disseminationLevel", to: :dissemination_level
map_attribute "identificationNumber", to: :identification_number
map_element "languageModule", to: :language_module
Expand Down
4 changes: 2 additions & 2 deletions lib/termium/designation_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module DesignationOperations
PART_OF_SPEECH_CODE_MAPPING = {
"ADJ" => "adj",
"N" => "noun",
"V" => "verb"
"V" => "verb",
}.freeze
def part_of_speech
value = parameter.detect do |x|
Expand All @@ -19,7 +19,7 @@ def part_of_speech
GENDER_CODE_MAPPING = {
"F" => "f",
"M" => "m",
"EPI" => "c" # this means "Epicine"
"EPI" => "c", # this means "Epicine"
}.freeze
def gender
value = parameter.detect do |x|
Expand Down
Loading
Loading