[examples/openspending][m] - added loader + fetching from datapackage

- Also added an indexing example
This commit is contained in:
Luccas Mateus de Medeiros Gomes 2023-05-17 14:57:50 -03:00
parent ebcb93c996
commit 902e5e07a0
11 changed files with 2051 additions and 87 deletions

View File

@ -0,0 +1,45 @@
import { Octokit } from 'octokit';
import { assert, expect, test } from 'vitest'
import { getProjectDataPackage } from '../lib/octokit';
export async function getAllDataPackagesFromOrg(
org: string,
branch?: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
const repos = await octokit.rest.repos.listForOrg({ org, type: 'public', per_page: 100 });
let failedDataPackages = [];
const datapackages = await Promise.all(
repos.data.map(async (_repo) => {
const datapackage = await getProjectDataPackage(
org,
_repo.name,
branch ? branch : 'main',
github_pat
);
if (!datapackage) {
failedDataPackages.push(_repo.name)
return null
};
return {...datapackage, repo: _repo.name};
})
);
return {
datapackages: datapackages.filter((item) => item !== null),
failedDataPackages,
};
}
test('Test OS-Data', async () => {
const repos = await getAllDataPackagesFromOrg('os-data', 'main', process.env.VITE_GITHUB_PAT)
if (repos.failedDataPackages.length > 0) console.log(repos.failedDataPackages)
expect(repos.failedDataPackages.length).toBe(0)
}, {timeout: 100000})
test('Test Gift-Data', async () => {
const repos = await getAllDataPackagesFromOrg('gift-data', 'main', process.env.VITE_GITHUB_PAT)
if (repos.failedDataPackages.length > 0) console.log(repos.failedDataPackages)
expect(repos.failedDataPackages.length).toBe(0)
}, {timeout: 100000})

View File

@ -2,24 +2,26 @@
{
"owner": "os-data",
"branch": "main",
"repo": "mongolia-budget-2016-2017",
"files": [
"data/mongolia-2017.csv",
"data/mongolia-2017__2017.csv"
]
"name": "mongolia-budget-2016-2017"
},
{
"owner": "os-data",
"branch": "main",
"repo": "gb-country-regional-analysis",
"files": [
"data/cofog.csv",
"data/cofog_dejargonise.csv",
"data/cra.csv",
"data/departments.csv",
"data/nuts_pop.csv",
"data/pogs.csv"
],
"readme": "README.md"
"name": "gb-country-regional-analysis"
},
{
"owner": "os-data",
"branch": "main",
"name": "berlin-berlin"
},
{
"owner": "os-data",
"branch": "main",
"name": "state-of-minas-gerais-brazil-planned-budget"
},
{
"owner": "os-data",
"branch": "main",
"name": "wesel"
}
]

View File

@ -0,0 +1,288 @@
/**
* Fiscal Data Package is a simple specification for data access and delivery of fiscal data.
*/
export type FiscalDataPackage = TabularDataPackage & {
countryCode?: ISO31661Alpha2CountryCode
regionCode?: string
cityCode?: string
author?: string
readme?: string
granularity?: GranularityOfResources
fiscalPeriod?: FiscalPeriodForTheBudget
[k: string]: unknown
}
/**
* The profile of this descriptor.
*/
export type Profile = "tabular-data-package"
/**
* An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.
*/
export type Name = string
/**
* A property reserved for globally unique identifiers. Examples of identifiers that are unique include UUIDs and DOIs.
*/
export type ID = string
/**
* A human-readable title.
*/
export type Title = string
/**
* A text description. Markdown is encouraged.
*/
export type Description = string
/**
* The home on the web that is related to this data package.
*/
export type HomePage = string
/**
* The datetime on which this descriptor was created.
*/
export type Created = string
/**
* The contributors to this descriptor.
*/
export type Contributors = [Contributor, ...Contributor[]]
/**
* A human-readable title.
*/
export type Title1 = string
/**
* A fully qualified URL, or a POSIX file path.
*/
export type Path = string
/**
* An email address.
*/
export type Email = string
/**
* An organizational affiliation for this contributor.
*/
export type Organization = string
/**
* A list of keywords that describe this package.
*/
export type Keywords = [string, ...string[]]
/**
* A image to represent this package.
*/
export type Image = string
/**
* The license(s) under which this package is published.
*/
export type Licenses = [License, ...License[]]
/**
* A license for this descriptor.
*/
export type License =
| {
[k: string]: unknown
}
| {
[k: string]: unknown
}
/**
* An `array` of Tabular Data Resource objects, each compliant with the [Tabular Data Resource](/tabular-data-resource/) specification.
*
/**
* A Tabular Data Resource.
*/
export interface TabularDataResource {
format?: string;
name: string;
description?: string;
title?: string;
schema?: Schema;
sample?: any[];
profile?: string;
key?: string;
path?: string;
size?: number;
}
export interface Field {
name: string;
type: FieldType;
}
export interface Schema {
fields: Field[];
}
export const OptionsFields = [
"any",
"array",
"boolean",
"date",
"datetime",
"duration",
"geojson",
"geopoint",
"integer",
"number",
"object",
"string",
"time",
"year",
"yearmonth",
] as const;
type FieldType = typeof OptionsFields[number];
/**
* A human-readable title.
*/
export type Title2 = string
/**
* A fully qualified URL, or a POSIX file path.
*/
export type Path1 = string
/**
* An email address.
*/
export type Email1 = string
/**
* The raw sources for this resource.
*/
export type Sources = Source[]
/**
* A keyword that represents the direction of the spend, either expenditure or revenue.
*/
export type DirectionOfTheSpending = "expenditure" | "revenue"
/**
* A keyword that represents the phase of the data, can be proposed for a budget proposal, approved for an approved budget, adjusted for modified budget or executed for the enacted budget
*/
export type BudgetPhase = "proposed" | "approved" | "adjusted" | "executed"
/**
* Either an array of strings corresponding to the name attributes in a set of field objects in the fields array or a single string corresponding to one of these names. The value of primaryKey indicates the primary key or primary keys for the dimension.
*/
export type PrimaryKey = string | [string, ...string[]]
/**
* Describes what kind of a dimension it is.
*/
export type DimensionType =
| "datetime"
| "entity"
| "classification"
| "activity"
| "fact"
| "location"
| "other"
/**
* The type of the classification.
*/
export type ClassificationType = "functional" | "administrative" | "economic"
/**
* A valid 2-digit ISO country code (ISO 3166-1 alpha-2), or, an array of valid ISO codes.
*/
export type ISO31661Alpha2CountryCode = string | [string, ...string[]]
/**
* A keyword that represents the type of spend data, eiter aggregated or transactional
*/
export type GranularityOfResources = "aggregated" | "transactional"
/**
* Tabular Data Package
*/
export interface TabularDataPackage {
profile: Profile
name?: Name
id?: ID
title?: Title
description?: Description
homepage?: HomePage
created?: Created
contributors?: Contributors
keywords?: Keywords
image?: Image
licenses?: Licenses
resources: TabularDataResource[]
sources?: Sources
[k: string]: unknown
}
/**
* A contributor to this descriptor.
*/
export interface Contributor {
title: Title1
path?: Path
email?: Email
organization?: Organization
role?: string
[k: string]: unknown
}
/**
* A source file.
*/
export interface Source {
title: Title2
path?: Path1
email?: Email1
[k: string]: unknown
}
/**
* Measures are numerical and correspond to financial amounts in the source data.
*/
export interface Measures {
[k: string]: Measure
}
/**
* Measure.
*
* This interface was referenced by `Measures`'s JSON-Schema definition
* via the `patternProperty` "^\w+".
*/
export interface Measure {
source: string
resource?: string
currency: string
factor?: number
direction?: DirectionOfTheSpending
phase?: BudgetPhase
[k: string]: unknown
}
/**
* Dimensions are groups of related fields. Dimensions cover all items other than the measure.
*/
export interface Dimensions {
[k: string]: Dimension
}
/**
* Dimension.
*
* This interface was referenced by `Dimensions`'s JSON-Schema definition
* via the `patternProperty` "^\w+".
*/
export interface Dimension {
attributes: Attributes
primaryKey: PrimaryKey
dimensionType?: DimensionType
classificationType?: ClassificationType
[k: string]: unknown
}
/**
* Attribute objects that make up the dimension
*/
export interface Attributes {
/**
* This interface was referenced by `Attributes`'s JSON-Schema definition
* via the `patternProperty` "^\w+".
*/
[k: string]: {
source: string
resource?: string
constant?: string | number
parent?: string
labelfor?: string
[k: string]: unknown
}
}
/**
* The fiscal period of the dataset
*/
export interface FiscalPeriodForTheBudget {
start: string
end?: string
[k: string]: unknown
}

View File

@ -0,0 +1,31 @@
import { FiscalDataPackage } from './datapackage.interface';
import { Project } from './project.interface';
export function loadDataPackage(
datapackage: FiscalDataPackage,
owner: string,
repo: string
): Project {
return {
name: datapackage.name,
owner: { name: owner },
repo: { name: repo },
files: datapackage.resources,
author: datapackage.author ? datapackage.author : null,
cityCode: datapackage.cityCode ? datapackage.cityCode : null,
countryCode: datapackage.countryCode
? (datapackage.countryCode as string)
: null,
fiscalPeriod: datapackage.fiscalPeriod
? { start: datapackage.fiscalPeriod.start
? datapackage.fiscalPeriod.start
: null,
end: datapackage.fiscalPeriod.end
? datapackage.fiscalPeriod.end
: null,
}
: null,
readme: datapackage.readme ? datapackage.readme : '',
datapackage
};
}

View File

@ -140,7 +140,8 @@ export async function getProject(project: GithubProject, github_pat?: string) {
return null;
}
let projectBase = "", last_updated = "";
let projectBase = '',
last_updated = '';
if (projectReadme) {
projectBase =
project.readme.split('/').length > 1
@ -162,3 +163,30 @@ export async function getProject(project: GithubProject, github_pat?: string) {
base_path: projectBase,
};
}
export async function getProjectDataPackage(
owner: string,
repo: string,
branch: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.getContent({
owner,
repo,
path: 'datapackage.json',
ref: branch,
});
const data = response.data as { content?: string };
const fileContent = data.content ? data.content : '';
if (fileContent === '') {
return null;
}
const decodedContent = Buffer.from(fileContent, 'base64').toString();
const datapackage = JSON.parse(decodedContent);
return {...datapackage, repo };
} catch (error) {
return null;
}
}

View File

@ -0,0 +1,18 @@
import { FiscalDataPackage, TabularDataResource } from "./datapackage.interface";
export interface Project {
owner: { name: string; logo?: string }; // Info about the owner of the data repo
repo: { name: string; logo?: string }; // Info about the the data repo
files: TabularDataResource[];
name: string;
title?: string;
author?: string;
cityCode?: string;
countryCode?: string;
fiscalPeriod?: {
start: string;
end: string;
};
readme?: string;
datapackage: FiscalDataPackage
}

File diff suppressed because it is too large Load Diff

View File

@ -6,21 +6,27 @@
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
"lint": "next lint",
"test": "vitest"
},
"dependencies": {
"@octokit/plugin-throttling": "^5.2.2",
"@types/flexsearch": "^0.7.3",
"@types/node": "18.16.0",
"@types/react": "18.0.38",
"@types/react-dom": "18.0.11",
"@vitejs/plugin-react": "^4.0.0",
"clsx": "^1.2.1",
"eslint": "8.39.0",
"eslint-config-next": "13.3.1",
"flexsearch": "0.7.21",
"next": "13.3.1",
"next-seo": "^6.0.0",
"octokit": "^2.0.14",
"prettier": "^2.8.8",
"react": "18.2.0",
"react-dom": "18.2.0",
"react-hook-form": "^7.43.9",
"react-markdown": "^8.0.7",
"react-timeago": "^7.1.0",
"remark-gfm": "^3.0.1",
@ -30,6 +36,7 @@
"@tailwindcss/typography": "^0.5.9",
"autoprefixer": "^10.4.14",
"postcss": "^8.4.23",
"tailwindcss": "^3.3.1"
"tailwindcss": "^3.3.1",
"vitest": "^0.31.0"
}
}

View File

@ -38,7 +38,7 @@ export default function ProjectPage({ project }) {
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{project.files.map((file) => (
{project.files?.map((file) => (
<tr key={file.download_url}>
<td className="whitespace-nowrap px-3 py-4 text-sm text-gray-500">
<a href={file.download_url}>{file.name}</a>
@ -79,7 +79,7 @@ export async function getStaticPaths() {
repo.readme && repo.readme.split('/').length > 1
? repo.readme.split('/').slice(0, -1)
: null;
let path = [repo.repo];
let path = [repo.name];
if (projectPath) {
projectPath.forEach((element) => {
path.push(element);
@ -105,7 +105,7 @@ export async function getStaticProps({ params }) {
_repo.readme && _repo.readme.split('/').length > 1
? _repo.readme.split('/').slice(0, -1)
: null;
let path = [_repo.repo];
let path = [_repo.name];
if (projectPath) {
projectPath.forEach((element) => {
path.push(element);

View File

@ -1,6 +1,6 @@
import { promises as fs } from 'fs';
import path from 'path';
import { getProject } from '../lib/octokit';
import { GithubProject, getProjectDataPackage } from '../lib/octokit';
import getConfig from 'next/config';
import ExternalLinkIcon from '../components/icons/ExternalLinkIcon';
import TimeAgo from 'react-timeago';
@ -8,22 +8,26 @@ import Link from 'next/link';
import { Hero } from '../components/Hero';
import { Header } from '../components/Header';
import { Container } from '../components/Container';
import { FiscalDataPackage } from '../lib/datapackage.interface';
import { loadDataPackage } from '../lib/loader';
import { Project } from '../lib/project.interface';
import { Index } from 'flexsearch';
import { useForm } from 'react-hook-form';
export async function getStaticProps() {
const jsonDirectory = path.join(
process.cwd(),
'/datasets.json'
);
const jsonDirectory = path.join(process.cwd(), '/datasets.json');
const repos = await fs.readFile(jsonDirectory, 'utf8');
const github_pat = getConfig().serverRuntimeConfig.github_pat;
const projects = await Promise.all(
(JSON.parse(repos)).map(async (repo) => {
const project = await getProject(repo, github_pat);
return { ...project, repo_config: repo };
})
const datapackages = await Promise.all(
JSON.parse(repos).map(
async (_repo: GithubProject) =>
await getProjectDataPackage(_repo.owner, _repo.name, 'main', github_pat)
)
);
const projects = datapackages.map(
(datapackage: FiscalDataPackage & { repo: string }) =>
loadDataPackage(datapackage, 'os-data', datapackage.name)
);
return {
props: {
projects,
@ -32,13 +36,23 @@ export async function getStaticProps() {
}
export function Datasets({ projects }) {
const index = new Index({ tokenize: 'full' });
projects.forEach((project: Project) =>
index.add(
project.name,
`${project.repo} ${project.name} ${project.title} ${project.author} ${project.title} ${project.cityCode} ${project.fiscalPeriod?.start} ${project.fiscalPeriod?.end}`
)
);
const { register, watch, handleSubmit, reset } = useForm({
defaultValues: {
searchTerm: '',
},
});
return (
<div className="bg-white min-h-screen">
<Header />
<Hero />
<section
className="py-20 sm:py-32"
>
<section className="py-20 sm:py-32">
<Container>
<div className="mx-auto max-w-2xl lg:mx-0">
<h2
@ -52,6 +66,14 @@ export function Datasets({ projects }) {
</p>
</div>
<div className="mt-5">
<div className="mt-6 flex flex-col gap-3 sm:flex-row">
<input
placeholder="Search here"
aria-label="Hate speech on Twitter"
{...register('searchTerm')}
className="min-w-0 flex-auto appearance-none rounded-md border border-zinc-900/10 bg-white px-3 py-[calc(theme(spacing.2)-1px)] shadow-md shadow-zinc-800/5 placeholder:text-zinc-600 focus:border-emerald-500 focus:outline-none focus:ring-4 focus:ring-emerald-500/10 sm:text-sm"
/>
</div>
<div className="-mx-4 -my-2 overflow-x-auto sm:-mx-6 lg:-mx-8">
<div className="inline-block min-w-full py-2 align-middle sm:px-6 lg:px-8">
<table className="min-w-full divide-y divide-gray-300">
@ -73,13 +95,13 @@ export function Datasets({ projects }) {
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Description
Author
</th>
<th
scope="col"
className="px-3 py-3.5 text-left text-sm font-semibold text-gray-900"
>
Last updated
Fiscal Year
</th>
<th
scope="col"
@ -88,34 +110,52 @@ export function Datasets({ projects }) {
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{projects.map((project) => (
<tr key={project.id}>
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
{project.repo_config.name
? project.repo_config.name
: project.full_name + (project.base_path === '/' ? '' : '/' + project.base_path)}
</td>
<td className="whitespace-nowrap px-3 py-6 text-sm group text-gray-500 hover:text-gray-900 transition-all duration-250">
<a href={project.html_url} target="_blank" className='flex items-center'>@{project.full_name} <ExternalLinkIcon className='ml-1' /></a>
</td>
<td className="px-3 py-4 text-sm text-gray-500">
{project.repo_config.description
? project.repo_config.description
: project.description}
</td>
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
<TimeAgo date={new Date(project.last_updated)} />
</td>
<td className="relative whitespace-nowrap py-6 pl-3 pr-4 text-right text-sm font-medium sm:pr-0">
<a
href={`/@${project.repo_config.owner}/${project.repo_config.repo}/${project.base_path === '/' ? '' : project.base_path}`}
className='border border-gray-900 text-gray-900 px-4 py-2 transition-all hover:bg-gray-900 hover:text-white'
>
info
</a>
</td>
</tr>
))}
{projects
.filter((project: Project) =>
watch().searchTerm && watch().searchTerm !== ''
? index
.search(watch().searchTerm)
.includes(project.name)
: true
)
.map((project: Project) => (
<tr key={project.name}>
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
{project.name}
</td>
<td className="whitespace-nowrap px-3 py-6 text-sm group text-gray-500 hover:text-gray-900 transition-all duration-250">
<a
href={`https://github.com/${project.owner.name}/${project.repo.name}`}
target="_blank"
className="flex items-center"
>
@{project.owner.name}/{project.repo.name}{' '}
<ExternalLinkIcon className="ml-1" />
</a>
</td>
<td className="px-3 py-4 text-sm text-gray-500">
{project.author}
</td>
{project.fiscalPeriod ? (
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
{project.fiscalPeriod.start} -{' '}
{project.fiscalPeriod.end}
</td>
) : (
<td className="whitespace-nowrap px-3 py-6 text-sm text-gray-500">
No data
</td>
)}
<td className="relative whitespace-nowrap py-6 pl-3 pr-4 text-right text-sm font-medium sm:pr-0">
<a
href={`/@${project.owner.name}/${project.repo.name}/`}
className="border border-gray-900 text-gray-900 px-4 py-2 transition-all hover:bg-gray-900 hover:text-white"
>
info
</a>
</td>
</tr>
))}
</tbody>
</table>
</div>

View File

@ -0,0 +1,10 @@
import { defineConfig } from 'vitest/config'
import react from '@vitejs/plugin-react'
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
test: {
environment: 'jsdom',
},
})