Skip to content

Commit

Permalink
Merge branch '__rultor'
Browse files Browse the repository at this point in the history
  • Loading branch information
rultor committed Oct 2, 2024
2 parents 5a19919 + 1cfe989 commit ae2c64d
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 122 deletions.
53 changes: 51 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,62 @@ repositories those were created in the provided date range.
| Option | Required | Description |
|---------------|----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| `--query` || [GitHub Search API query] |
| `--start` || The start date to search the repositories, in [ISO] format; e.g. `2024-01-01` |
| `--end` || The end date to search the repositories, in [ISO] format; e.g. `2024-01-01` |
| `--graphql` || Path to GitHub API GraphQL query, default is `ghminer.graphql`. |
| `--schema` || Path to parsing schema, default is `ghminer.json`. |
| `--start` || The start date to search the repositories, in [ISO] format; e.g. `2024-01-01`. |
| `--end` || The end date to search the repositories, in [ISO] format; e.g. `2024-01-01`. |
| `--tokens` || Text file name that contains a number of [GitHub PATs]. Those will be used in order to pass GitHub API rate limits. Add as many tokens as needed, considering the amount of data (they should be separated by line break). |
| `--date` || The type of the date field to search on, you can choose from `created`, `updated` and `pushed`, the default one is `created`. |
| `--batchsize` || Request batch-size value in the range `10..100`. The default value is `10`. |
| `--filename` || The name of the file for the found repos (CSV and JSON files). The default one is `result`. |
| `--json` || Save found repos as JSON file too. |

### GraphQL Query

Your query, provided in `--graphql` can have all
[GitHub supported fields][Gh Explorer] you want. However, to keep this query
running to collect all possible repositories, ghminer requires you to have
the following structure:

* `search` with `$searchQuery`, `$first`, `$after` attributes.
* `pageInfo` with `endCursor`, `hasNextPage` attributes.
* `repositoryCount` field.

Here is an example:

```graphql
query ($searchQuery: String!, $first: Int, $after: String) {
search(query: $searchQuery, type: REPOSITORY, first: $first, after: $after) {
repositoryCount
...
pageInfo {
endCursor
hasNextPage
}
}
}
```

### Parsing Schema

To parse response generated by [GraphQL Query](#graphql-query), you should
provide the parsing schema. This schema should have all desired metadata field
names as keys and path to the data in response as values.

For instance:

```json
{
"repo": "nameWithOwner",
"branch": "defaultBranchRef.name",
"readme": "defaultBranchRef.target.repository.object.text",
"topics": "repositoryTopics.edges[].node.topic.name",
"lastCommitDate": "defaultBranchRef.target.history.edges[0].node.committedDate",
"commits": "defaultBranchRef.target.history.totalCount",
"workflows": "object.entries.length"
}
```

## How to contribute

Fork repository, make changes, send us a [pull request](https://www.yegor256.com/2014/04/15/github-guidelines.html).
Expand All @@ -78,3 +126,4 @@ You will need [Node 20+] installed.
[limitation]: https://stackoverflow.com/questions/37602893/github-search-limit-results
[Node 20+]: https://nodejs.org/en/download/package-manager
[blogpost]: https://h1alexbel.github.io/2024/05/24/ghminer.html
[Gh Explorer]: https://docs.github.com/en/graphql/overview/explorer
98 changes: 5 additions & 93 deletions src/graph.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,98 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
const graph = `query ($searchQuery: String!, $first: Int, $after: String) {
search(query: $searchQuery, type: REPOSITORY, first: $first, after: $after) {
repositoryCount
nodes {
... on Repository {
nameWithOwner
description
defaultBranchRef {
name
}
object(expression: "HEAD:.github/workflows/") {
... on Tree {
entries {
name
object {
... on Blob {
byteSize
}
}
}
}
}
primaryLanguage {
name
}
createdAt
refs(refPrefix: "refs/heads/") {
totalCount
}
defaultBranchRef {
name
target {
repository {
object(expression: "HEAD:README.md") {
... on Blob {
text
}
}
}
... on Commit {
history(first: 1) {
totalCount
edges {
node {
committedDate
}
}
}
}
}
}
mentionableUsers {
totalCount
}
latestRelease {
createdAt
}
releases(first:1) {
edges {
node {
id
}
}
totalCount
}
stargazerCount
forkCount
pullRequests {
totalCount
}
issues(states: [OPEN]) {
totalCount
}
licenseInfo {
spdxId
}
repositoryTopics(first: 10) {
edges {
node {
topic {
name
}
}
}
}
}
}
pageInfo {
endCursor
hasNextPage
}
}
}`;

const fs = require('fs');
const graph = (path) => {
return fs.readFileSync(path, 'utf-8');
};

module.exports = graph;
38 changes: 11 additions & 27 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const path = require('path');
const filed = require('./tokens.js');
const query = require('./graph.js');
const pkg = require('../package.json');
const fs = require('fs');
const nestedProp = require('./nested-prop.js');

console.log(`Running ghminer@${pkg.version}`);
const argv = minimist(process.argv.slice(2));
Expand All @@ -41,6 +43,8 @@ const startDate = argv.start || '2008-01-01';
const endDate = argv.end || now;
const dateType = argv.date || 'created';
const print = argv.json || false;
const gpath = argv.graphql || 'ghminer.graphql';
const schema = argv.schema || 'ghminer.json';

let tokens;
if (argv.tokens) {
Expand Down Expand Up @@ -96,7 +100,7 @@ async function fetchResultsBatch(
Authorization: `Bearer ${nextToken()}`
}
});
const data = await client.request(query, {
const data = await client.request(query(gpath), {
searchQuery,
first: batchsize,
after: cursor
Expand Down Expand Up @@ -222,38 +226,18 @@ async function fetchAllResults() {
}
}

// @todo #1:45min Add support for dynamic field mapping.
// Let's add support for dynamic field mapping, user will define his own
// mapping in `.yml` file, we will parse that and apply it when writing
// results to the files. e.g.: name: result.nameWithOwner, etc. In this
// case `result` should be bindable only in that `.yml` config.
/**
* Write results to files
* @param {Object} json Json objects
*/
function writeFiles(json) {
const format = JSON.parse(fs.readFileSync(schema, 'utf-8'));
const formattedResults = json.map((result) => {
const data = {
repo: result.nameWithOwner,
branch: result.defaultBranchRef.name,
readme: result.defaultBranchRef.target.repository.object? result.defaultBranchRef.target.repository.object.text : '',
description: result.description ? result.description : '',
topics: result.repositoryTopics.edges.map((edge) => edge.node.topic.name),
createdAt: result.createdAt,
lastCommitDate: result.defaultBranchRef.target.history.edges[0].node.committedDate,
lastReleaseDate: result.latestRelease ? result.latestRelease.createdAt : '',
releases: result.releases.totalCount,
contributors: result.mentionableUsers.totalCount,
pulls: result.pullRequests.totalCount,
commits: result.defaultBranchRef.target.history.totalCount,
issues: result.issues.totalCount,
forks: result.forkCount,
stars: result.stargazerCount,
branches: result.refs.totalCount,
workflows: result.object ? result.object.entries.length: 0,
license: result.licenseInfo ? result.licenseInfo.spdxId : '',
language: result.primaryLanguage ? result.primaryLanguage.name : '',
};
const data = {};
for (const [key, path] of Object.entries(format)) {
const value = nestedProp(result, path);
data[key] = value !== null && value !== undefined ? value : '';
}
return data;
});
toCsv(fileName, formattedResults);
Expand Down
48 changes: 48 additions & 0 deletions src/nested-prop.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2024 Aliaksei Bialiauski
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

const nestedProp = (obj, path) => {
return path.split('.').reduce((acc, key) => {
if (!acc) return null;
const match = key.match(/^([a-zA-Z_$][a-zA-Z_$0-9]*)\[(\d*)\]$/);
if (match) {
const arrayKey = match[1];
const index = match[2];
if (index) {
return acc[arrayKey] && Array.isArray(acc[arrayKey]) ? acc[arrayKey][parseInt(index, 10)] : null;
} else {
return acc[arrayKey] && Array.isArray(acc[arrayKey]) ? acc[arrayKey] : [];
}
}
if (Array.isArray(acc)) {
return acc.map((item) => item[key] !== undefined ? item[key] : null);
} else if (acc[key] !== undefined) {
return acc[key];
} else {
return null;
}
}, obj);
};

module.exports = nestedProp;
39 changes: 39 additions & 0 deletions test/graph.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2024 Aliaksei Bialiauski
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
const assert = require('assert');
const graph = require('../src/graph');

describe('Test case for graph.js', function() {
it('reads GraphQL query from file', function() {
const query = graph(
'test/resources/query.graphql'
);
const expected = 'query ($searchQuery: String!, $first: Int, $after: String) {}\n';
assert.equal(
query,
expected,
`found query ${query} does not match with expected ${expected}`
);
});
});
Loading

0 comments on commit ae2c64d

Please sign in to comment.