From 44c72424d6d53e3395eb9f529b5f6299c20671b2 Mon Sep 17 00:00:00 2001 From: blundellc Date: Sat, 13 Sep 2008 13:20:12 +0000 Subject: [PATCH] license: hashes of freeform licenses are stored in the database and these hashes used for auto-acceptance. freeform licenses may be files, or may be the contents of the License: field in the R DESCRIPTION. such text is mapped to lower case and all space characters are compressed and mapped to a single space. a nicer interface for adding these freeform licenses is introduced. after reviewing the license, its hash may be added as follows: $ cran2deb license license> add uroot gpl (maps hash of whatever freeform license uroot has to gpl) ... $ cran2deb build uroot (success is assured!) git-svn-id: svn://svn.r-forge.r-project.org/svnroot/cran2deb@73 edb9625f-4e0d-4859-8d74-9fd3b1da38cb --- pkg/trunk/R/db.R | 45 +++++++-------- pkg/trunk/R/license.R | 95 +++++++++++++++++++++++++------- pkg/trunk/data/populate_licenses | 16 +++--- pkg/trunk/exec/license | 61 ++++++++++++++++---- 4 files changed, 156 insertions(+), 61 deletions(-) diff --git a/pkg/trunk/R/db.R b/pkg/trunk/R/db.R index 5067273..c802bae 100644 --- a/pkg/trunk/R/db.R +++ b/pkg/trunk/R/db.R @@ -15,10 +15,10 @@ db_start <- function() { ,',accept INT NOT NULL' ,')')) } - if (!dbExistsTable(con,'license_files')) { - dbGetQuery(con,paste('CREATE TABLE license_files (' + if (!dbExistsTable(con,'license_hashes')) { + dbGetQuery(con,paste('CREATE TABLE license_hashes (' ,' name TEXT NOT NULL' - ,',file_sha1 TEXT PRIMARY KEY NOT NULL' + ,',sha1 TEXT PRIMARY KEY NOT NULL' ,')')) } return(con) @@ -36,7 +36,7 @@ db_sysreq_override <- function(sysreq_text) { sysreq_text <- tolower(sysreq_text) con <- db_start() results <- dbGetQuery(con,paste( - 'SELECT debian_name FROM sysreq_override WHERE' + 'SELECT DISTINCT debian_name FROM sysreq_override WHERE' ,db_quote(sysreq_text),'GLOB r_pattern')) db_stop(con) if (length(results) == 0) { @@ -70,7 +70,7 @@ db_license_override_name <- function(name) { name <- tolower(name) con <- db_start() results <- dbGetQuery(con,paste( - 'SELECT accept FROM license_override WHERE' + 'SELECT DISTINCT accept FROM license_override WHERE' ,db_quote(name),'= name')) db_stop(con) if (length(results) == 0) { @@ -95,39 +95,40 @@ db_add_license_override <- function(name,accept) { db_stop(con) } -db_license_override_file <- function(file_sha1) { - file_sha1 <- tolower(file_sha1) +db_license_override_hash <- function(license_sha1) { + license_sha1 <- tolower(license_sha1) con <- db_start() results <- dbGetQuery(con,paste( - 'SELECT name,accept FROM license_override' - ,'INNER JOIN license_files' - ,'ON license_files.name = license_override.name WHERE' - ,db_quote(file_sha1),'= license_files.file_sha1')) + 'SELECT DISTINCT accept FROM license_override' + ,'INNER JOIN license_hashes' + ,'ON license_hashes.name = license_override.name WHERE' + ,db_quote(license_sha1),'= license_hashes.sha1')) db_stop(con) - # TODO: change accept from 0,1 into FALSE,TRUE - # TODO: NULL -> NA - return(results) + if (length(results) == 0) { + return(NA) + } + return(as.logical(results$accept)) } db_license_overrides <- function() { con <- db_start() overrides <- dbGetQuery(con,paste('SELECT * FROM license_override')) - files <- dbGetQuery(con,paste('SELECT * FROM license_files')) + hashes <- dbGetQuery(con,paste('SELECT * FROM license_hashes')) db_stop(con) # TODO: change accept from 0,1 into FALSE,TRUE - return(list(overrides=overrides,files=files)) + return(list(overrides=overrides,hashes=hashes)) } -db_add_license_file <- function(name,file_sha1) { +db_add_license_hash <- function(name,license_sha1) { name <- tolower(name) - file_sha1 <- tolower(file_sha1) - message(paste('adding file',file_sha1,'for',name)) + license_sha1 <- tolower(license_sha1) + message(paste('adding hash',license_sha1,'for',name)) con <- db_start() dbGetQuery(con,paste( - 'INSERT OR REPLACE INTO license_files' - ,'(name, file_sha1) VALUES (' + 'INSERT OR REPLACE INTO license_hashes' + ,'(name, sha1) VALUES (' ,' ',db_quote(name) - ,',',db_quote(file_sha1) + ,',',db_quote(license_sha1) ,')')) db_stop(con) } diff --git a/pkg/trunk/R/license.R b/pkg/trunk/R/license.R index 6f04aa6..c8a3dde 100644 --- a/pkg/trunk/R/license.R +++ b/pkg/trunk/R/license.R @@ -1,17 +1,46 @@ is_acceptable_license <- function(license) { - # determine if license is acceptable + # determine if license text is acceptable + + if (length(grep('^file ',license))) { + # skip file licenses + return(FALSE) + } + license <- license_text_reduce(license) + action = db_license_override_name(license) + if (!is.na(action)) { + return(action) + } + license <- license_text_further_reduce(license) + action = db_license_override_name(license) + if (!is.na(action)) { + message(paste('W: Accepting/rejecting wild license as',license,'. FIX THE PACKAGE!')) + return(action) + } + # TODO: file {LICENSE,LICENCE} (+ maybe COPYING?) + message(paste('E: Wild license',license,'did not match classic rules; rejecting')) + return(F) +} + +license_text_reduce <- function(license) { + # these reduction steps are sound for all conformant R license + # specifications. # compress spaces into a single space - license = gsub('[[:blank:]]+',' ',license) + license = gsub('[[:space:]]+',' ',license) # make all characters lower case license = tolower(license) # don't care about versions of licenses license = chomp(sub('\\( ?[<=>!]+ ?[0-9.-]+ ?\\)','' ,sub('-[0-9.-]+','',license))) - action = db_license_override_name(license) - if (!is.na(action)) { - return(action) - } + # remove any extra space introduced + license = chomp(gsub('[[:space:]]+',' ',license)) + return(license) +} + +license_text_further_reduce <- function(license) { + # these reduction steps are heuristic and may lead to + # in correct acceptances, if care is not taken. + # uninteresting urls license = gsub('http://www.gnu.org/[[:alnum:]/._-]*','',license) license = gsub('http://www.x.org/[[:alnum:]/._-]*','',license) @@ -27,28 +56,50 @@ is_acceptable_license <- function(license) { license = gsub('licen[sc]e','',license) license = gsub('(gnu )?(gpl|general public)','gpl',license) license = gsub('(mozilla )?(mpl|mozilla public)','mpl',license) - # remove any extra space introduced - license = chomp(gsub('[[:space:]]+',' ',license)) - action = db_license_override_name(license) - if (!is.na(action)) { - message(paste('W: Accepting/rejecting wild license as',license,'. FIX THE PACKAGE!')) - return(action) - } # remove everything that looks like a version specification license = gsub('(ver?sion|v)? *[0-9.-]+ *(or *(higher|later|newer|greater|above))?','' ,license) # remove any extra space introduced license = chomp(gsub('[[:space:]]+',' ',license)) - action = db_license_override_name(license) - if (!is.na(action)) { - message(paste('W: Accepting/rejecting wild license as',license,'. FIX THE PACKAGE!')) - return(action) + return(license) +} + +license_text_hash_reduce <- function(text) { + # reduction of license text, suitable for hashing. + return(chomp(tolower(gsub('[[:space:]]+',' ',text)))) +} + +get_license_hash <- function(pkg,license) { + license <- license_text_reduce(license) + if (length(grep('^file ',license))) { + if (length(grep('^file LICEN[CS]E$',license))) { + path = gsub('file ','',license) + path = file.path(pkg$path, path) + license <- license_text_reduce(readChar(path,file.info(path)$size)) + } else { + message(paste('E: invalid license file specification',license)) + return(NA) + } } - # TODO: file {LICENSE,LICENCE} (+ maybe COPYING?) - message(paste('E: Wild license',license,'did not match; rejecting')) - return(F) + return(digest(license,algo='sha1',serialize=FALSE)) +} + +is_acceptable_hash_license <- function(pkg,license) { + license_sha1 <- get_license_hash(pkg,license) + if (is.na(license_sha1)) { + return(FALSE) + } + action = db_license_override_hash(license_sha1) + if (is.na(action)) { + action = FALSE + } + if (action) { + message(paste('W: Wild license',license,'accepted via hash',license_sha1)) + } + return(action) } + accept_license <- function(pkg) { # check the license if (!('License' %in% names(pkg$description[1,]))) { @@ -61,6 +112,10 @@ accept_license <- function(pkg) { accept=license break } + if (is_acceptable_hash_license(pkg,license)) { + accept=license + break + } } if (is.null(accept)) { stop(paste('No acceptable license:',pkg$description[1,'License'])) diff --git a/pkg/trunk/data/populate_licenses b/pkg/trunk/data/populate_licenses index 2c46922..70e941c 100644 --- a/pkg/trunk/data/populate_licenses +++ b/pkg/trunk/data/populate_licenses @@ -1,16 +1,16 @@ add GPL -file GPL /usr/share/common-licenses/GPL-2 -file GPL /usr/share/common-licenses/GPL-3 +hash GPL /usr/share/common-licenses/GPL-2 +hash GPL /usr/share/common-licenses/GPL-3 add LGPL -file LGPL /usr/share/common-licenses/LGPL-2 -file LGPL /usr/share/common-licenses/LGPL-2.1 -file LGPL /usr/share/common-licenses/LGPL-3 +hash LGPL /usr/share/common-licenses/LGPL-2 +hash LGPL /usr/share/common-licenses/LGPL-2.1 +hash LGPL /usr/share/common-licenses/LGPL-3 add BSD -file BSD /usr/share/common-licenses/BSD +hash BSD /usr/share/common-licenses/BSD add ARTISTIC -file ARTISTIC /usr/share/common-licenses/Artistic +hash ARTISTIC /usr/share/common-licenses/Artistic add APACHE -file APACHE /usr/share/common-licenses/Apache-2.0 +hash APACHE /usr/share/common-licenses/Apache-2.0 add AGPL add MIT add X11 diff --git a/pkg/trunk/exec/license b/pkg/trunk/exec/license index 4f6b01a..5263879 100755 --- a/pkg/trunk/exec/license +++ b/pkg/trunk/exec/license @@ -5,7 +5,13 @@ suppressPackageStartupMessages(library(digest)) exec_cmd <- function(argc, argv) { usage <- function() - message('usage: add [reject]|file |ls|quit|help') + message(paste('usage: add [reject]' + ,' hash (|)' + ,' accept ' + ,' reject ' + ,' ls' + ,' quit' + ,sep='\n')) if (argc < 1) { exit() @@ -17,28 +23,61 @@ exec_cmd <- function(argc, argv) { usage() return() } - accept = (argc != 3) - db_add_license_override(argv[2],accept) - } else if (cmd == 'file') { + action = (argc != 3) + db_add_license_override(argv[2],action) + } else if (cmd == 'hash') { if (argc != 3) { usage() return() } license = argv[2] path = argv[3] - if (is.null(db_license_override_name(license))) { - message(paste('license',license,'is not known')) + if (is.na(db_license_override_name(license))) { + message(paste('E: license',license,'is not known; add it first')) return() } if (file.exists(path)) { - file_sha1 = digest(readChar(path,file.info(path)$size) - ,algo='sha1', serialize=FALSE) + license_sha1 = digest(readChar(path,file.info(path)$size) + ,algo='sha1', serialize=FALSE) } else if (length(grep('^[0-9a-f]{40}$',path))) { - file_sha1 = path + license_sha1 = path } else { - stop(paste(path,'does not exist and does not look like an SHA1 hash')) + message(paste('E:',path,'does not exist and does not look like an SHA1 hash')) + return() + } + db_add_license_hash(license,license_sha1) + } else if (cmd == 'reject' || cmd == 'accept') { + if (argc != 3) { + usage() + return() + } + pkg_name <- argv[2] + license <- argv[3] + current_action <- db_license_override_name(license) + action = (cmd == 'accept') + if (is.na(current_action)) { + message(paste('N: license',license,'is not known; adding it')) + db_add_license_override(license,action) + } else if (action != current_action) { + message(paste('E: differing actions propose for license',license)) + return() + } + tmp <- setup() + success <- try((function() { + pkg <- prepare_pkg(tmp,pkg_name) + if (!('License' %in% names(pkg$description[1,]))) { + message(paste('E: package',pkg$name,'has no License: field in DESCRIPTION')) + return() + } + first_license = (strsplit(chomp(pkg$description[1,'License']) + ,'[[:space:]]*\\|[[:space:]]*')[[1]])[1] + license_sha1 <- get_license_hash(pkg,first_license) + db_add_license_hash(license,license_sha1) + })()) + cleanup(tmp) + if (inherits(success,'try-error')) { + stop(call.=F) } - db_add_license_file(license,file_sha1) } else if (cmd == 'ls') { for (x in db_license_overrides()) print(x) } else if (cmd == 'help') { -- 2.39.5