From 6d6805972b34b28a2c2ff595b75d9362cfc1add7 Mon Sep 17 00:00:00 2001 From: Enoc Date: Sat, 19 Jun 2021 01:06:24 -0600 Subject: [PATCH 1/2] Move rails from FileScraper to URLscraper [initial steps] --- .../templates/pages/about_tmpl.coffee | 2 +- lib/docs/filters/rails/clean_html_guides.rb | 2 +- lib/docs/filters/rails/entries.rb | 8 +- lib/docs/scrapers/rails.rb | 128 ++++++++++++++++++ lib/docs/scrapers/rdoc/rails.rb | 119 ---------------- 5 files changed, 134 insertions(+), 125 deletions(-) create mode 100644 lib/docs/scrapers/rails.rb delete mode 100644 lib/docs/scrapers/rdoc/rails.rb diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index d054f21a69..b202296404 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -738,7 +738,7 @@ credits = [ 'https://www.ruby-lang.org/en/about/license.txt' ], [ 'Ruby on Rails', - '2004-2020 David Heinemeier Hansson
Rails, Ruby on Rails, and the Rails logo are trademarks of David Heinemeier Hansson.', + '2004-2021 David Heinemeier Hansson
Rails, Ruby on Rails, and the Rails logo are trademarks of David Heinemeier Hansson.', 'MIT', 'https://raw.githubusercontent.com/rails/rails/master/activerecord/MIT-LICENSE' ], [ diff --git a/lib/docs/filters/rails/clean_html_guides.rb b/lib/docs/filters/rails/clean_html_guides.rb index a40dcba09e..1f81ed58c5 100644 --- a/lib/docs/filters/rails/clean_html_guides.rb +++ b/lib/docs/filters/rails/clean_html_guides.rb @@ -2,7 +2,7 @@ module Docs class Rails class CleanHtmlGuidesFilter < Filter def call - return doc unless slug.start_with?('guides') + return doc unless root_url.to_s.match?('guides') at_css('#mainCol').prepend_child at_css('#feature .wrapper').children @doc = at_css('#mainCol') diff --git a/lib/docs/filters/rails/entries.rb b/lib/docs/filters/rails/entries.rb index 0b29a4afbb..b83b4c878c 100644 --- a/lib/docs/filters/rails/entries.rb +++ b/lib/docs/filters/rails/entries.rb @@ -1,6 +1,6 @@ module Docs class Rails - class EntriesFilter < Docs::Rdoc::EntriesFilter + class EntriesFilter < Docs::EntriesFilter# Docs::Rdoc::EntriesFilter TYPE_BY_NAME_MATCHES = { /Assertions|::Test|Fixture/ => 'Testing', /\AActiveRecord.+mysql/i => 'ActiveRecord/MySQL', @@ -49,7 +49,7 @@ def get_name end def get_type - return 'Guides' if slug.start_with?('guides') + return 'Guides' if root_url.to_s.match?('guides') parent = at_css('.meta-parent').try(:content).to_s @@ -69,13 +69,13 @@ def get_type end def include_default_entry? - return true if slug.start_with?('guides') + return true if root_url.to_s.match?('guides') super && !skip? end def additional_entries - return [] if slug.start_with?('guides') + return [] if root_url.to_s.match?('guides') skip? ? [] : super end diff --git a/lib/docs/scrapers/rails.rb b/lib/docs/scrapers/rails.rb new file mode 100644 index 0000000000..8789a7d36b --- /dev/null +++ b/lib/docs/scrapers/rails.rb @@ -0,0 +1,128 @@ +module Docs + class Rails < UrlScraper + # include FixInternalUrlsBehavior + include MultipleBaseUrls + + self.name = 'Ruby on Rails' + self.type = 'rdoc' + self.slug = 'rails' + + self.links = { + home: 'http://rubyonrails.org/', + code: 'https://github.com/rails/rails' + } + + # html_filters.replace 'container', 'rails/container' + html_filters.push 'rails/entries', 'rdoc/clean_html', 'rails/clean_html_guides' + + options[:skip_rdoc_filters?] = ->(filter) { filter.root_url.to_s.match?('guides/') } + + options[:root_title] = 'Ruby on Rails' + + options[:skip] = [ + 'links.html', + 'index.html', + 'credits.html', + 'ruby_on_rails_guides_guidelines.html', + 'contributing_to_ruby_on_rails.html', + 'development_dependencies_install.html', + 'api_documentation_guidelines.html', + 'ActionController/Instrumentation.html', + 'ActionController/Rendering.html', + 'ActionDispatch/DebugExceptions.html', + 'ActionDispatch/Journey/Parser.html', + 'ActionDispatch/Reloader.html', + 'ActionDispatch/Routing/HtmlTableFormatter.html', + 'ActionDispatch/ShowExceptions.html', + 'ActionView/FixtureResolver.html', + 'ActionView/LogSubscriber.html', + 'ActionView/TestCase/Behavior/RenderedViewsCollection.html', + 'ActiveRecord/Tasks/DatabaseTasks.html', + 'ActiveSupport/Dependencies/WatchStack.html', + 'ActiveSupport/Notifications/Fanout.html', + 'ActionDispatch/www.example.com', + 'ActionDispatch/Http/www.rubyonrails.org', + 'ActionDispatch/Http/www.rubyonrails.co.uk', + '\'TZ\'', + 'active_record_migrations.html', + 'association_basics.html' + ] + + options[:skip_patterns] = [ + /history/i, + /rakefile/i, + /changelog/i, + /readme/i, + /news/i, + /license/i, + /release_notes/, + /\AActionController\/Testing/, + /\AActionView\/LookupContext/, + /\AActionView\/Resolver/, + /\AActiveSupport\/Multibyte\/Unicode\//, + /\AActiveSupport\/XML/i, + /\ASourceAnnotationExtractor/, + /\AI18n\/Railtie/, + /\AMinitest/, + /\ARails\/API/, + /\ARails\/AppBuilder/, + /\ARails\/PluginBuilder/, + /\ARails\/Generators\/Testing/ + ] + + options[:attribution] = ->(filter) do + if filter.slug.start_with?('guides') + <<-HTML + © 2004–2021 David Heinemeier Hansson
+ Licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. + HTML + else + <<-HTML + © 2004–2021 David Heinemeier Hansson
+ Licensed under the MIT License. + HTML + end + end + + version '6.1' do + self.release = '6.1.3.2' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/' + ] + + options[:skip_patterns] << /v.*\..*\// + end + + version '6.0' do + self.release = '6.0.0' + end + + version '5.2' do + self.release = '5.2.2' + end + + version '5.1' do + self.release = '5.1.6' + end + + version '5.0' do + self.release = '5.0.7' + end + + version '4.2' do + self.release = '4.2.11' + end + + version '4.1' do + self.release = '4.1.16' + end + + def get_latest_version(opts) + doc = fetch_doc('https://rubyonrails.org/', opts) + doc.at_css('.version p a').content.scan(/\d\.\d*\.*\d*\.*\d*/)[0] + end + + end +end diff --git a/lib/docs/scrapers/rdoc/rails.rb b/lib/docs/scrapers/rdoc/rails.rb deleted file mode 100644 index 907deed3bf..0000000000 --- a/lib/docs/scrapers/rdoc/rails.rb +++ /dev/null @@ -1,119 +0,0 @@ -module Docs - class Rails < Rdoc - # Instructions: - # 1. Download a release at https://github.com/rails/rails/releases - # 2. Open "railties/lib/rails/api/task.rb" and comment out any code related to sdoc ("configure_sdoc") - # 3. Run "bundle install --without db && bundle exec rake rdoc" (in the Rails directory) - # 4. Run "cd guides && bundle exec rake guides:generate:html" - # 5. Copy the "guides/output" directory to "html/guides" - # 6. Copy the "html" directory to "docs/rails~[version]" - - include FixInternalUrlsBehavior - - self.name = 'Ruby on Rails' - self.slug = 'rails' - self.initial_paths = %w(guides/index.html) - self.links = { - home: 'http://rubyonrails.org/', - code: 'https://github.com/rails/rails' - } - - html_filters.replace 'rdoc/entries', 'rails/entries' - html_filters.push 'rails/clean_html_guides' - - options[:skip_rdoc_filters?] = ->(filter) { filter.slug.start_with?('guides/') } - - options[:root_title] = 'Ruby on Rails' - - options[:skip] += %w( - guides/credits.html - guides/ruby_on_rails_guides_guidelines.html - guides/contributing_to_ruby_on_rails.html - guides/development_dependencies_install.html - guides/api_documentation_guidelines.html - ActionController/Instrumentation.html - ActionController/Rendering.html - ActionDispatch/DebugExceptions.html - ActionDispatch/Journey/Parser.html - ActionDispatch/Reloader.html - ActionDispatch/Routing/HtmlTableFormatter.html - ActionDispatch/ShowExceptions.html - ActionView/FixtureResolver.html - ActionView/LogSubscriber.html - ActionView/TestCase/Behavior/RenderedViewsCollection.html - ActiveRecord/Tasks/DatabaseTasks.html - ActiveSupport/Dependencies/WatchStack.html - ActiveSupport/Notifications/Fanout.html) - - # False positives found by docs:generate - options[:skip].concat %w( - ActionDispatch/www.example.com - ActionDispatch/Http/www.rubyonrails.org - ActionDispatch/Http/www.rubyonrails.co.uk - 'TZ' - active_record_migrations.html - association_basics.html) - - options[:skip_patterns] += [ - /release_notes/, - /\AActionController\/Testing/, - /\AActionView\/LookupContext/, - /\AActionView\/Resolver/, - /\AActiveSupport\/Multibyte\/Unicode\//, - /\AActiveSupport\/XML/i, - /\ASourceAnnotationExtractor/, - /\AI18n\/Railtie/, - /\AMinitest/, - /\ARails\/API/, - /\ARails\/AppBuilder/, - /\ARails\/PluginBuilder/, - /\ARails\/Generators\/Testing/] - - options[:attribution] = ->(filter) do - if filter.slug.start_with?('guides') - <<-HTML - © 2004–2020 David Heinemeier Hansson
- Licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. - HTML - else - <<-HTML - © 2004–2020 David Heinemeier Hansson
- Licensed under the MIT License. - HTML - end - end - - version '6.1' do - self.release = '6.1.1' - end - - version '6.0' do - self.release = '6.0.0' - end - - version '5.2' do - self.release = '5.2.2' - end - - version '5.1' do - self.release = '5.1.6' - end - - version '5.0' do - self.release = '5.0.7' - end - - version '4.2' do - self.release = '4.2.11' - end - - version '4.1' do - self.release = '4.1.16' - end - - def get_latest_version(opts) - doc = fetch_doc('https://rubyonrails.org/', opts) - doc.at_css('.version p a').content.scan(/\d\.\d*\.*\d*\.*\d*/)[0] - end - end -end From c38856c5285444d66d2aaef3ac2f0c88e7edff7c Mon Sep 17 00:00:00 2001 From: Enoc Date: Tue, 22 Jun 2021 22:29:44 -0600 Subject: [PATCH 2/2] Improve url scraping in rails --- assets/stylesheets/application.css.scss | 1 + assets/stylesheets/pages/_rails.scss | 9 ++ docs/file-scrapers.md | 1 - lib/docs/filters/rails/clean_html.rb | 67 +++++++++++++++ lib/docs/filters/rails/clean_html_guides.rb | 37 --------- lib/docs/filters/rails/entries.rb | 92 ++++++--------------- lib/docs/scrapers/rails.rb | 52 +++++++++--- 7 files changed, 140 insertions(+), 119 deletions(-) create mode 100644 assets/stylesheets/pages/_rails.scss create mode 100644 lib/docs/filters/rails/clean_html.rb delete mode 100644 lib/docs/filters/rails/clean_html_guides.rb diff --git a/assets/stylesheets/application.css.scss b/assets/stylesheets/application.css.scss index 0243afeb05..cc6c0291ca 100644 --- a/assets/stylesheets/application.css.scss +++ b/assets/stylesheets/application.css.scss @@ -97,6 +97,7 @@ 'pages/pygame', 'pages/python', 'pages/qt', + 'pages/rails', 'pages/ramda', 'pages/rdoc', 'pages/react_native', diff --git a/assets/stylesheets/pages/_rails.scss b/assets/stylesheets/pages/_rails.scss new file mode 100644 index 0000000000..acf3eae261 --- /dev/null +++ b/assets/stylesheets/pages/_rails.scss @@ -0,0 +1,9 @@ +._rails { + .title.method-title { + @extend %block-label, %label-blue; + } + + h2 { + @extend %block-heading; + } +} diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index c25db04231..aa2dace08b 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -207,7 +207,6 @@ done ### Nokogiri ### Ruby / Minitest -### Ruby on Rails ### Ruby Download the tarball of Ruby from https://www.ruby-lang.org/en/downloads/, extract it, run `./configure && make html` in your terminal (while your are in the ruby directory) and move diff --git a/lib/docs/filters/rails/clean_html.rb b/lib/docs/filters/rails/clean_html.rb new file mode 100644 index 0000000000..8a6e4c6d78 --- /dev/null +++ b/lib/docs/filters/rails/clean_html.rb @@ -0,0 +1,67 @@ +module Docs + class Rails + class CleanHtmlFilter < Filter + def call + + if current_url.to_s.match?('guides') + css('img, textarea, button, .anchorlink').remove + + at_css('#mainCol').prepend_child at_css('#feature .wrapper').children + @doc = at_css('#mainCol') + + container = Nokogiri::XML::Node.new 'div', doc + container['class'] = '_rails' + container.children = doc.children + doc << container + + css('h2, h3, h4, h5, h6').each do |node| + node.name = node.name.sub(/\d/) { |i| i.to_i - 1 } + end + + doc.prepend_child at_css('h1') + + if version == '6.1' || version == '6.0' + css('pre').each do |node| + code = node.at_css('code') + language = code['class'][/highlight ?(\w+)/, 1] + node['data-language'] = language unless language == 'plain' + code.remove_attribute('class') + node.content = node.content.strip + end + end + + else + title = at_css('h2') + title.name = 'h1' + + @doc = at_css('#content') + @doc.prepend_child(title) + + css('table td').each do |node| + node.remove if node.content.empty? + end + + css('.permalink').remove + + css('.sectiontitle').each do |node| + node.name = 'h2' + end + + css('pre').each do |node| + node['data-language'] = 'ruby' + end + + # move 'source on github' to the end of the source code + css('.sourcecode').each do |node| + github_url = node.at_css('.github_url') + github_url.content = "Source on Github" + node.at_css('.source-link').content = 'Source:' + node.at_css('.dyn-source').after(github_url) + end + end + + doc + end + end + end +end diff --git a/lib/docs/filters/rails/clean_html_guides.rb b/lib/docs/filters/rails/clean_html_guides.rb deleted file mode 100644 index 1f81ed58c5..0000000000 --- a/lib/docs/filters/rails/clean_html_guides.rb +++ /dev/null @@ -1,37 +0,0 @@ -module Docs - class Rails - class CleanHtmlGuidesFilter < Filter - def call - return doc unless root_url.to_s.match?('guides') - - at_css('#mainCol').prepend_child at_css('#feature .wrapper').children - @doc = at_css('#mainCol') - - container = Nokogiri::XML::Node.new 'div', doc - container['class'] = '_simple' - container.children = doc.children - doc << container - - css('h2, h3, h4, h5, h6').each do |node| - node.name = node.name.sub(/\d/) { |i| i.to_i - 1 } - end - - doc.prepend_child at_css('h1') - - css('#subCol', '.code_container').each do |node| - node.before(node.children).remove - end - - css('pre').each do |node| - code = node.at_css('code') - language = code['class'][/highlight ?(\w+)/, 1] - node['data-language'] = language unless language == 'plain' - code.remove_attribute('class') - node.content = node.content.strip - end - - doc - end - end - end -end diff --git a/lib/docs/filters/rails/entries.rb b/lib/docs/filters/rails/entries.rb index b83b4c878c..e48fe34905 100644 --- a/lib/docs/filters/rails/entries.rb +++ b/lib/docs/filters/rails/entries.rb @@ -1,88 +1,44 @@ module Docs class Rails class EntriesFilter < Docs::EntriesFilter# Docs::Rdoc::EntriesFilter - TYPE_BY_NAME_MATCHES = { - /Assertions|::Test|Fixture/ => 'Testing', - /\AActiveRecord.+mysql/i => 'ActiveRecord/MySQL', - /\AActiveRecord.+postgresql/i => 'ActiveRecord/PostgreSQL', - /\AActiveRecord.+sqlite/i => 'ActiveRecord/SQLite', - /\AActiveRecord.+Assoc/ => 'ActiveRecord/Associations', - /\AActiveRecord.+Attribute/ => 'ActiveRecord/Attributes', - /\AActiveRecord.+ConnectionAdapters/ => 'ActiveRecord/Connection', - /\AActiveSupport.+(Subscriber|Notifications)/ => 'ActiveSupport/Instrumentation' } - - TYPE_BY_NAME_STARTS_WITH = { - 'ActionController::Parameters' => 'ActionController/Parameters', - 'ActionDispatch::Integration' => 'Testing', - 'ActionDispatch::Request' => 'ActionDispatch/Request', - 'ActionDispatch::Response' => 'ActionDispatch/Response', - 'ActionDispatch::Routing' => 'ActionDispatch/Routing', - 'ActionView::Helpers' => 'ActionView/Helpers', - 'ActiveModel::Errors' => 'ActiveModel/Validation', - 'ActiveModel::Valid' => 'ActiveModel/Validation', - 'ActiveRecord::Batches' => 'ActiveRecord/Query', - 'ActiveRecord::Calculations' => 'ActiveRecord/Query', - 'ActiveRecord::Connection' => 'ActiveRecord/Connection', - 'ActiveRecord::FinderMethods' => 'ActiveRecord/Query', - 'ActiveRecord::Migra' => 'ActiveRecord/Migration', - 'ActiveRecord::Query' => 'ActiveRecord/Query', - 'ActiveRecord::Relation' => 'ActiveRecord/Relation', - 'ActiveRecord::Result' => 'ActiveRecord/Connection', - 'ActiveRecord::Scoping' => 'ActiveRecord/Query', - 'ActiveRecord::SpawnMethods' => 'ActiveRecord/Query', - 'ActiveSupport::Cach' => 'ActiveSupport/Caching', - 'ActiveSupport::Inflector' => 'ActiveSupport/Inflector', - 'ActiveSupport::Time' => 'ActiveSupport/TimeZones', - 'Rails::Application' => 'Rails/Application', - 'Rails::Engine' => 'Rails/Engine', - 'Rails::Generators' => 'Rails/Generators', - 'Rails::Railtie' => 'Rails/Railtie' } - def get_name - if slug.start_with?('guides') - name = at_css('#feature h2').content.strip - name.remove! %r{\s\(.+\)\z} - return name + if current_url.to_s.match?('guides') + at_css('h2').content + else + name = at_css('h2').to_html.scan(/<\/span>.*?", '') + name.sub!('<', '') + end + + name.strip end - - super end def get_type - return 'Guides' if root_url.to_s.match?('guides') - - parent = at_css('.meta-parent').try(:content).to_s - - if [name, parent].any? { |str| str.end_with?('Error') || str.end_with?('Exception') } - return 'Errors' - end - - TYPE_BY_NAME_MATCHES.each_pair do |key, value| - return value if name =~ key - end + return 'Guides' if current_url.to_s.match?('guides') + return 'Ruby files' if name =~ /.rb/ - TYPE_BY_NAME_STARTS_WITH.each_pair do |key, value| - return value if name.start_with?(key) - end + name.split('::')[0] - super end - def include_default_entry? - return true if root_url.to_s.match?('guides') + def additional_entries + return [] if current_url.to_s.match?('guides') - super && !skip? - end + entries = [] - def additional_entries - return [] if root_url.to_s.match?('guides') + css('.title.method-title').each do |node| + entry_name = node.at_css('b').content + entries << [name+"##{entry_name}", node['id']] + end - skip? ? [] : super + entries end - def skip? - @skip ||= !css('p').any? { |node| node.content.present? } - end end end end diff --git a/lib/docs/scrapers/rails.rb b/lib/docs/scrapers/rails.rb index 8789a7d36b..56c55bb018 100644 --- a/lib/docs/scrapers/rails.rb +++ b/lib/docs/scrapers/rails.rb @@ -1,10 +1,9 @@ module Docs class Rails < UrlScraper - # include FixInternalUrlsBehavior include MultipleBaseUrls self.name = 'Ruby on Rails' - self.type = 'rdoc' + self.type = 'rails' self.slug = 'rails' self.links = { @@ -12,10 +11,7 @@ class Rails < UrlScraper code: 'https://github.com/rails/rails' } - # html_filters.replace 'container', 'rails/container' - html_filters.push 'rails/entries', 'rdoc/clean_html', 'rails/clean_html_guides' - - options[:skip_rdoc_filters?] = ->(filter) { filter.root_url.to_s.match?('guides/') } + html_filters.push 'rails/entries', 'rails/clean_html' options[:root_title] = 'Ruby on Rails' @@ -88,35 +84,65 @@ class Rails < UrlScraper self.release = '6.1.3.2' self.base_urls = [ - 'https://api.rubyonrails.org/', - 'https://guides.rubyonrails.org/' + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/' ] options[:skip_patterns] << /v.*\..*\// end version '6.0' do - self.release = '6.0.0' + self.release = '6.1.3.2' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/' + ] end version '5.2' do - self.release = '5.2.2' + self.release = '5.2.5' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v5.2/' + ] end version '5.1' do - self.release = '5.1.6' + self.release = '5.1.7' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v5.1/' + ] end version '5.0' do - self.release = '5.0.7' + self.release = '5.0.7.2' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v5.0/' + ] end version '4.2' do - self.release = '4.2.11' + self.release = '4.2.11.3' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v4.2/' + ] end version '4.1' do self.release = '4.1.16' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v4.1/' + ] end def get_latest_version(opts)