[#810, github-backed example][xl]: improve looks, improve README, rename from simple-example to github-backed (#864)

This commit is contained in:
João Demenech
2023-05-09 19:19:36 -03:00
committed by GitHub
parent 714faf9986
commit 3f350f8fcd
24 changed files with 258 additions and 518 deletions

View File

@@ -0,0 +1,32 @@
{
"extends": [
"next",
"next/core-web-vitals"
],
"ignorePatterns": ["!**/*", ".next/**/*"],
"overrides": [
{
"files": ["*.ts", "*.tsx", "*.js", "*.jsx"],
"rules": {
"@next/next/no-html-link-for-pages": [
"error",
"examples/simple-example/pages"
]
}
},
{
"files": ["*.ts", "*.tsx"],
"rules": {}
},
{
"files": ["*.js", "*.jsx"],
"rules": {}
}
],
"rules": {
"@next/next/no-html-link-for-pages": "off"
},
"env": {
"jest": true
}
}

View File

@@ -0,0 +1,102 @@
# A data catalog with data on GitHub
This example showcases a simple data catalog that get its data from a list of GitHub repos that serve as datasets.
A `datasets.json` file is used to specify which datasets are going to be part of the data catalog.
The application contains an index page, which lists all the datasets specified in the `datasets.json` file, and users can see more information about each dataset, such as the list of data files in it and the README, by clicking the "info" button on the list.
You can read more about it on the [Data catalog with data on GitHub](https://portaljs.org/docs/examples/github-backed-catalog) blog post.
## Demo
https://example.portaljs.org/
## Deploy your own
[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fdatopian%2Fportaljs%2Ftree%2Fmain%2Fexamples%2Fgithub-backed-catalog)
By clicking on this button, you will be redirected to a page which will allow you to clone the content into your own GitHub/GitLab/Bitbucket account and automatically deploy everything.
## How to use
### Install
Execute `create-next-app` to bootstrap the example:
```
npx create-next-app <app-name> --example https://github.com/datopian/portaljs/tree/main/examples/github-backed-catalog
cd <app-name>
```
### Set environment variables
This project uses the GitHub API, which for anonymous users will cap at 50 requests per hour, so you might want to get a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) and add it to a `.env` file inside the folder like so
```
GITHUB_PAT=<github token>
```
### Change datasets
You can change the datasets that will be displayed in the data catalog by editing the file `datasets.json`. Some examples can be found inside [this repo](https://github.com/datasets).
### Run in development mode
Run the app using:
```
npm run dev
```
Open http://localhost:3000 from your browser. You should see something similar to this:
![](https://i.imgur.com/jAljJ9C.png)
If click on the `info` button for a dataset you will see a page similar to this:
![](https://i.imgur.com/AoJd4O0.png)
## Notes
### Structure of `datasets.json`
The `datasets.json` file is simply a list of datasets, below you can see a minimal example of a dataset:
```json
{
"owner": "fivethirtyeight",
"repo": "data",
"branch": "master",
"files": ["nba-raptor/historical_RAPTOR_by_player.csv", "nba-raptor/historical_RAPTOR_by_team.csv"],
"readme": "nba-raptor/README.md"
}
```
It has:
- A `owner` which is going to be the github repo owner
- A `repo` which is going to be the github repo name
- A `branch` which is going to be the branch to which we need to get the files and the readme
- A list of `files` which is going to be a list of paths with files that you want to show to the world
- A `readme` which is going to be the path to your data description, it can also be a subpath eg: `example/README.md`
You can also add:
- A `description` which is useful if you have more than one dataset for each repo, if not provided we are just going to use the repo description
- A `Name` which is useful if you want to give your dataset a nice name, if not provided we are going to use the junction of the `owner` the `repo` + the path of the README, in the exaple above it will be `fivethirtyeight/data/nba-raptor`
### Extra commands
You can also build the project for production with:
```
npm run build
```
And run the production build with:
```
npm run start
```

View File

@@ -0,0 +1,20 @@
import Link from "next/link";
import HomeIcon from "../icons/HomeIcon";
export default function Breadcrumbs({ links }: { links: { title: string, href?: string, target?: string }[] }) {
const current = links.at(-1);
return <div className="flex items-center uppercase font-black text-xs">
<Link className="flex items-center" href='/'><HomeIcon /></Link>
{/* {links.length > 1 && links.slice(0, -1).map((link) => {
return <>
<span className="mx-4">/</span>
<Link href={link.href}>{link.title}</Link>
</>
})} */}
<span className="mx-4">/</span>
<span>{current.title}</span>
</div >
}

View File

@@ -0,0 +1,3 @@
export default function ExternalLinkIcon({ className = "" }) {
return <div className={`inline-block w-4 ${className}`}><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" fill="currentColor"><path d="M 40 10 C 38.896 10 38 10.896 38 12 C 38 13.104 38.896 14 40 14 L 47.171875 14 L 30.585938 30.585938 C 29.804938 31.366938 29.804938 32.633063 30.585938 33.414062 C 30.976938 33.805063 31.488 34 32 34 C 32.512 34 33.023063 33.805062 33.414062 33.414062 L 50 16.828125 L 50 24 C 50 25.104 50.896 26 52 26 C 53.104 26 54 25.104 54 24 L 54 12 C 54 10.896 53.104 10 52 10 L 40 10 z M 18 12 C 14.691 12 12 14.691 12 18 L 12 46 C 12 49.309 14.691 52 18 52 L 46 52 C 49.309 52 52 49.309 52 46 L 52 34 C 52 32.896 51.104 32 50 32 C 48.896 32 48 32.896 48 34 L 48 46 C 48 47.103 47.103 48 46 48 L 18 48 C 16.897 48 16 47.103 16 46 L 16 18 C 16 16.897 16.897 16 18 16 L 30 16 C 31.104 16 32 15.104 32 14 C 32 12.896 31.104 12 30 12 L 18 12 z"/></svg></div>
}

View File

@@ -0,0 +1,3 @@
export default function HomeIcon({ className = "" }) {
return <div className={`inline-block w-4 ${className}`}><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"> <path d="M 12 2 A 1 1 0 0 0 11.289062 2.296875 L 1.203125 11.097656 A 0.5 0.5 0 0 0 1 11.5 A 0.5 0.5 0 0 0 1.5 12 L 4 12 L 4 20 C 4 20.552 4.448 21 5 21 L 9 21 C 9.552 21 10 20.552 10 20 L 10 14 L 14 14 L 14 20 C 14 20.552 14.448 21 15 21 L 19 21 C 19.552 21 20 20.552 20 20 L 20 12 L 22.5 12 A 0.5 0.5 0 0 0 23 11.5 A 0.5 0.5 0 0 0 22.796875 11.097656 L 12.716797 2.3027344 A 1 1 0 0 0 12.710938 2.296875 A 1 1 0 0 0 12 2 z"/></svg></div>
}

View File

@@ -0,0 +1,44 @@
[
{
"owner": "datasets",
"branch": "main",
"repo": "oil-prices",
"files": [
"data/brent-daily.csv",
"data/brent-monthly.csv",
"data/brent-weekly.csv",
"data/brent-year.csv",
"data/wti-daily.csv",
"data/wti-monthly.csv",
"data/wti-weekly.csv",
"data/wti-year.csv"
],
"readme": "README.md"
},
{
"owner": "datasets",
"branch": "main",
"repo": "investor-flow-of-funds-us",
"files": [
"data/monthly.csv",
"data/weekly.csv"
],
"readme": "README.md"
},
{
"owner": "fivethirtyeight",
"repo": "data",
"branch": "master",
"description": "Data about bad drivers",
"name": "Bad Drivers",
"files": ["bad-drivers/bad-drivers.csv"],
"readme": "bad-drivers/README.md"
},
{
"owner": "fivethirtyeight",
"repo": "data",
"branch": "master",
"files": ["nba-raptor/historical_RAPTOR_by_player.csv", "nba-raptor/historical_RAPTOR_by_team.csv"],
"readme": "nba-raptor/README.md"
}
]

View File

@@ -0,0 +1,6 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
declare module '*.svg' {
const content: any;
export const ReactComponent: any;
export default content;
}

View File

@@ -0,0 +1,147 @@
import { Octokit } from 'octokit';
export interface GithubProject {
owner: string;
repo: string;
branch: string;
files: string[];
readme: string;
description?: string;
name?: string;
}
export async function getProjectReadme(
owner: string,
repo: string,
branch: string,
readme: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.getContent({
owner,
repo,
path: readme,
ref: branch,
});
const data = response.data as { content?: string };
const fileContent = data.content ? data.content : '';
if (fileContent === '') {
return null;
}
const decodedContent = Buffer.from(fileContent, 'base64').toString();
return decodedContent;
} catch (error) {
console.log(error);
return null;
}
}
export async function getLastUpdated(
owner: string,
repo: string,
branch: string,
readme: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.listCommits({
owner,
repo,
path: readme,
ref: branch,
});
return response.data[0].commit.committer.date;
} catch (error) {
console.log(error);
return null;
}
}
export async function getProjectMetadata(
owner: string,
repo: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.get({
owner,
repo,
});
return response.data;
} catch (error) {
console.log(error);
return null;
}
}
export async function getRepoContents(
owner: string,
repo: string,
branch: string,
files: string[],
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const contents = [];
for (const path of files) {
const response = await octokit.rest.repos.getContent({
owner,
repo,
ref: branch,
path: path,
});
const data = response.data as { download_url?: string, name: string, size: number };
contents.push({ download_url: data.download_url, name: data.name, size: data.size});
}
return contents;
} catch (error) {
console.log(error);
return null;
}
}
export async function getProject(project: GithubProject, github_pat?: string) {
const projectMetadata = await getProjectMetadata(
project.owner,
project.repo,
github_pat
);
if (!projectMetadata) {
return null;
}
const projectReadme = await getProjectReadme(
project.owner,
project.repo,
project.branch,
project.readme,
github_pat
);
if (!projectReadme) {
return null;
}
const projectData = await getRepoContents(
project.owner,
project.repo,
project.branch,
project.files,
github_pat
);
if (!projectData) {
return null;
}
const projectBase = project.readme.split('/').length > 1
? project.readme.split('/').slice(0, -1).join('/')
: '/'
const last_updated = await getLastUpdated(
project.owner,
project.repo,
project.branch,
projectBase,
github_pat
);
return { ...projectMetadata, files: projectData, readmeContent: projectReadme, last_updated, base_path: projectBase };
}

View File

@@ -0,0 +1,5 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
// NOTE: This file should not be edited
// see https://nextjs.org/docs/basic-features/typescript for more information.

View File

@@ -0,0 +1,17 @@
const nextConfig = {
async rewrites() {
return {
beforeFiles: [
{
source: '/@:org/:project*',
destination: '/@org/:org/:project*',
},
],
};
},
serverRuntimeConfig: {
github_pat: process.env.GITHUB_PAT ? process.env.GITHUB_PAT : null,
},
};
module.exports = nextConfig;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,33 @@
{
"name": "my-app",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@types/node": "18.16.0",
"@types/react": "18.0.38",
"@types/react-dom": "18.0.11",
"eslint": "8.39.0",
"eslint-config-next": "13.3.1",
"next": "13.3.1",
"next-seo": "^6.0.0",
"octokit": "^2.0.14",
"react": "18.2.0",
"react-dom": "18.2.0",
"react-markdown": "^8.0.7",
"react-timeago": "^7.1.0",
"remark-gfm": "^3.0.1",
"typescript": "5.0.4"
},
"devDependencies": {
"@tailwindcss/typography": "^0.5.9",
"autoprefixer": "^10.4.14",
"postcss": "^8.4.23",
"tailwindcss": "^3.3.1"
}
}

View File

@@ -0,0 +1,124 @@
import { NextSeo } from 'next-seo';
import { promises as fs } from 'fs';
import path from 'path';
import getConfig from 'next/config';
import { getProject, GithubProject } from '../../../lib/octokit';
import ReactMarkdown from 'react-markdown';
import remarkGfm from 'remark-gfm';
import Breadcrumbs from '../../../components/_shared/Breadcrumbs';
export default function ProjectPage({ project }) {
const repoId = `@${project.repo_config.owner}/${project.repo_config.repo}`
return (
<>
<NextSeo title={`${repoId}${project.base_path !== '/' ? '/' + project.base_path : ''} - GitHub Datasets`} />
<main className="prose mx-auto my-8">
<Breadcrumbs links={[{ title: repoId, href: "" }]} />
<h1 className="mb-0 mt-16">{project.repo_config.name || repoId}</h1>
<p className='mb-8'><span className='font-semibold'>Repository:</span> <a target="_blank" href={project.html_url}>{project.html_url}</a></p>
<h2 className="mb-0 mt-10">Files</h2>
<div className="inline-block min-w-full py-2 align-middle">
<table className="min-w-full divide-y divide-gray-300">
<thead>
<tr>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Name
</th>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Size
</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{project.files.map((file) => (
<tr key={file.download_url}>
<td className="whitespace-nowrap px-3 py-4 text-sm text-gray-500">
<a href={file.download_url}>{file.name}</a>
</td>
<td className="whitespace-nowrap px-3 py-4 text-sm text-gray-500">
{file.size} Bytes
</td>
</tr>
))}
</tbody>
</table>
</div>
<hr />
<h2 className='uppercase font-black'>Readme</h2>
<ReactMarkdown remarkPlugins={[remarkGfm]}>
{project.readmeContent}
</ReactMarkdown>
</main>
</>
);
}
// Generates `/posts/1` and `/posts/2`
export async function getStaticPaths() {
const jsonDirectory = path.join(
process.cwd(),
'datasets.json'
);
const repos = await fs.readFile(jsonDirectory, 'utf8');
return {
paths: JSON.parse(repos).map((repo) => {
const projectPath =
repo.readme.split('/').length > 1
? repo.readme.split('/').slice(0, -1)
: null;
let path = [repo.repo];
if (projectPath) {
projectPath.forEach((element) => {
path.push(element);
});
}
return {
params: { org: repo.owner, path },
};
}),
fallback: false, // can also be true or 'blocking'
};
}
export async function getStaticProps({ params }) {
const jsonDirectory = path.join(
process.cwd(),
'datasets.json'
);
const reposFile = await fs.readFile(jsonDirectory, 'utf8');
const repos: GithubProject[] = JSON.parse(reposFile);
const repo = repos.find((_repo) => {
const projectPath =
_repo.readme.split('/').length > 1
? _repo.readme.split('/').slice(0, -1)
: null;
let path = [_repo.repo];
if (projectPath) {
projectPath.forEach((element) => {
path.push(element);
});
}
return (
_repo.owner == params.org &&
JSON.stringify(path) === JSON.stringify(params.path)
);
});
const github_pat = getConfig().serverRuntimeConfig.github_pat;
const project = await getProject(repo, github_pat);
return {
props: {
project: { ...project, repo_config: repo },
},
};
}

View File

@@ -0,0 +1,18 @@
import { AppProps } from 'next/app';
import Head from 'next/head';
import './styles.css';
function CustomApp({ Component, pageProps }: AppProps) {
return (
<>
<Head>
<title>GitHub Datasets</title>
</Head>
<main className="app">
<Component {...pageProps} />
</main>
</>
);
}
export default CustomApp;

View File

@@ -0,0 +1,117 @@
import { promises as fs } from 'fs';
import path from 'path';
import { getProject } from '../lib/octokit';
import getConfig from 'next/config';
import ExternalLinkIcon from '../components/icons/ExternalLinkIcon';
import TimeAgo from 'react-timeago';
import Link from 'next/link';
export async function getStaticProps() {
const jsonDirectory = path.join(
process.cwd(),
'/datasets.json'
);
const repos = await fs.readFile(jsonDirectory, 'utf8');
const github_pat = getConfig().serverRuntimeConfig.github_pat;
const projects = await Promise.all(
(JSON.parse(repos)).map(async (repo) => {
const project = await getProject(repo, github_pat);
return { ...project, repo_config: repo };
})
);
return {
props: {
projects,
},
};
}
export function Datasets({ projects }) {
return (
<div className="bg-white min-h-screen">
<div className="mx-auto max-w-7xl px-6 py-16 sm:py-24 lg:px-8">
<div className='text-center'>
<h2 className="text-3xl font-bold leading-10 tracking-tight">
GitHub Datasets
</h2>
<p className="mt-3 mx-auto max-w-2xl text-base leading-7 text-gray-500">
Data catalog with datasets hosted on GitHub by <Link target="_blank" className='underline' href="https://portaljs.org/">🌀 PortalJS</Link>
</p>
</div>
<div className="mt-20">
<div className="-mx-4 -my-2 overflow-x-auto sm:-mx-6 lg:-mx-8">
<div className="inline-block min-w-full py-2 align-middle sm:px-6 lg:px-8">
<table className="min-w-full divide-y divide-gray-300">
<thead>
<tr>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Name
</th>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Repository
</th>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Description
</th>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Last updated
</th>
<th
scope="col"
className="relative py-3.5 pl-3 pr-4 sm:pr-0"
></th>
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{projects.map((project) => (
<tr key={project.id}>
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
{project.repo_config.name
? project.repo_config.name
: project.full_name + (project.base_path === '/' ? '' : '/' + project.base_path)}
</td>
<td className="whitespace-nowrap px-3 py-6 text-sm group text-gray-500 hover:text-gray-900 transition-all duration-250">
<a href={project.html_url} target="_blank" className='flex items-center'>@{project.full_name} <ExternalLinkIcon className='ml-1' /></a>
</td>
<td className="px-3 py-4 text-sm text-gray-500">
{project.repo_config.description
? project.repo_config.description
: project.description}
</td>
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
<TimeAgo date={new Date(project.last_updated)} />
</td>
<td className="relative whitespace-nowrap py-6 pl-3 pr-4 text-right text-sm font-medium sm:pr-0">
<a
href={`/@${project.repo_config.owner}/${project.repo_config.repo}/${project.base_path === '/' ? '' : project.base_path}`}
className='border border-gray-900 text-gray-900 px-4 py-2 transition-all hover:bg-gray-900 hover:text-white'
>
info
</a>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
);
}
export default Datasets;

View File

@@ -0,0 +1,80 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
html {
-webkit-text-size-adjust: 100%;
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont,
Segoe UI, Roboto, Helvetica Neue, Arial, Noto Sans, sans-serif,
Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol, Noto Color Emoji;
line-height: 1.5;
tab-size: 4;
scroll-behavior: smooth;
}
body {
font-family: inherit;
line-height: inherit;
margin: 0;
}
h1,
h2,
p,
pre {
margin: 0;
}
*,
::before,
::after {
box-sizing: border-box;
border-width: 0;
border-style: solid;
border-color: currentColor;
}
h1,
h2 {
font-size: inherit;
font-weight: inherit;
}
a {
color: inherit;
text-decoration: inherit;
}
pre {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas,
Liberation Mono, Courier New, monospace;
}
svg {
display: block;
vertical-align: middle;
shape-rendering: auto;
text-rendering: optimizeLegibility;
}
pre {
background-color: rgba(55, 65, 81, 1);
border-radius: 0.25rem;
color: rgba(229, 231, 235, 1);
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas,
Liberation Mono, Courier New, monospace;
overflow: scroll;
padding: 0.5rem 0.75rem;
}
.shadow {
box-shadow: 0 0 #0000, 0 0 #0000, 0 10px 15px -3px rgba(0, 0, 0, 0.1),
0 4px 6px -2px rgba(0, 0, 0, 0.05);
}
.rounded {
border-radius: 1.5rem;
}
.wrapper {
width: 100%;
}
.container {
margin-left: auto;
margin-right: auto;
max-width: 768px;
padding-bottom: 3rem;
padding-left: 1rem;
padding-right: 1rem;
color: rgba(55, 65, 81, 1);
width: 100%;
}

View File

@@ -0,0 +1,6 @@
module.exports = {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}

View File

@@ -0,0 +1,15 @@
/** @type {import('tailwindcss').Config} */
module.exports = {
content: [
"./app/**/*.{js,ts,jsx,tsx,mdx}",
"./pages/**/*.{js,ts,jsx,tsx,mdx}",
"./components/**/*.{js,ts,jsx,tsx,mdx}",
],
theme: {
extend: {},
},
plugins: [
require('@tailwindcss/typography')
],
}

View File

@@ -0,0 +1,20 @@
{
"compilerOptions": {
"target": "es5",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": false,
"forceConsistentCasingInFileNames": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
"exclude": ["node_modules"]
}