###Overaching Structure### #? are there ways to hyperlink this text for easier navigation?
#1.exploratory
##1a = multivar with broad factors
##1b = univariate of each individuall with specific factors
#2. multivar with relevant factors
#3. interaction between reuse and sharing (also make sure this covers (or cover independently)...trends in articles that do vs. those that don't share)--->see #7 above
#4. univariate of dominant suspected trends
#5. sampling artifacts

##TO TRY:
#Sarah, fyi ran into this R func while reading some stuff with nic:
#relevel in base package
#To reorder levels of a factor, making one the reference.

##TO TRY:
#iterative automation
###not necessary for this analysis b/c I like seeing them one by one.

#Load library ----------------------------------------------------------------------------------------------------------------------------------------
library(Design)

#Load Tables and Confirm Factor Names ----------------------------------------------------------------------------------------------------------------------------------------
SnapReuse=read.csv("ReuseDatasetsSnap.csv");str(SnapReuse)
TimeReuse=read.csv("ReuseDatasetsTime.csv");str(TimeReuse)
AllReuse=read.csv("ReuseDatasetsAll.csv");str(AllReuse)
SnapShare=read.csv("SharingDatasetsSnap.csv");str(SnapShare)
TimeShare=read.csv("SharingDatasetsTime.csv");str(TimeShare)
AllShare=read.csv("SharingDatasetsAll.csv");str(AllShare)


#Attach Input  ----------------------------------------------------------------------------------------------------------------------------------------
attach(AllReuse)

#Descriptive Tables of Data  ----------------------------------------------------------------------------------------------------------------------------------------
##Possibilities
###scores/metrics
str(ResolvableScoreRevised_Max,data=AllReuse)#  ResolvableScoreRevised_Max #datasets:reuse, sharing; article     #variations: Min, previous versions
str(AttributionScore_IncluSelf_Revised_Max,data=AllReuse)#  AttributionScore_IncluSelf_Revised_Max #datasets:reuse; article   #variations: Min, previous versions
str(Ideal_CitationScoreSimpleREVISED_Max ,data=AllReuse)#  Ideal_CitationScoreSimpleREVISED_Max  #datasets:reuse, sharing (IN PROGRESS); article    #variations: YN, Min, previous versions
str(RightPlaceYN,data=AllShare)#  RightPlaceYN  #datasets:sharing
#str(,data=AllShare)#  RightPlaceScore   #article
str(PercentSharedNoSim ,data=AllShare)#  PercentSharedNoSim  #variations: AdditionalUnsharedDataYN , W/Sim
###interactions
xtabs(~ Journal+ResolvableScoreRevised_Max)#Journal - specific --> question = policy (does a journals unique data policy influence reuse/sharing?)
xtabs(~ ImpactFactor +ResolvableScoreRevised_Max)#Journal - continuous (impact factor)  --> question =   does higher impact factor influence reuse/sharing quality?
xtabs(~ Discipline_Gut +ResolvableScoreRevised_Max)#Journal - broad (discilpine_gut) --> question = discipline (scientist attitude within)
xtabs(~ Ecology+ResolvableScoreRevised_Max) #Journal - broad (discipline YN) --> question = discipline (scientist attitude within)
xtabs(~ EvoBio +ResolvableScoreRevised_Max) #Journal - broad(discipline YN) --> question =   discipline (scientist attitude within)
xtabs(~ EnvironSci +ResolvableScoreRevised_Max)  #Journal - broad (discipline YN) --> question = discipline (scientist attitude within)
xtabs(~ YearCode+ResolvableScoreRevised_Max)#Year - specific    --> question = time
xtabs(~ YearCode#?+ResolvableScoreRevised_Max)#Year - broad  --> question =  pre vs. post policy or other critical time cutoffs
xtabs(~ DepositoryAbbrvOtherSpecified+ResolvableScoreRevised_Max) #Depository - specific  --> question = which depository is "best"?
xtabs(~ DepositoryYNandNot+ResolvableScoreRevised_Max) #Depository - broad  --> question = does data deposition in depository increase citation quality?
xtabs(~ TypeOfDataset+ResolvableScoreRevised_Max) #Data Type -specific  --> question =  are certain data sets better cited? GS suspected
xtabs(~ DataGenre +ResolvableScoreRevised_Max)   #Data Type - broad (genre) --> question = does the inherent type of data (unit of analysis) influence citation quality? _less confounding with depository, fewer degrees of freedom
xtabs(~ DataDefinedDiscipline  +ResolvableScoreRevised_Max)   #Data Type - broad (data-defined discipline) --> question =   discipline (scientist attitude within)_difficult to define each paper by its discipline, so why not use the datatype?!
xtabs(~ Open.Access +ResolvableScoreRevised_Max)#OA - specific (sub vs. OA)   --> question =  does reuse/sharing (quality [for dataset]) increase [for article] with ARTICLE level open access?
xtabs(~ OA_Journal+ResolvableScoreRevised_Max)#OA - broad (sub vs. OA)   --> question =  does reuse/sharing increase with JOURNAL level open access?
xtabs(~ Journal#?+ResolvableScoreRevised_Max)#Citations - continuous (sub vs. OA)   --> question =  does reuse/sharing increase # citing articles? _!need to normalize this to per year average!
#not explored here: funder
#not explored here: author (number of, nationality, affilication/discipline)
#Sampling artifacts:
xtabs(~ ArticleTypeReg1Oth0 +ResolvableScoreRevised_Max)#ArticleType - binary (regular vs. special)  --> question =  do articles need to be "regular" articles to have data sharing/reuse or to have better quality (b/c higher word count, etc)?_could also look at effect of word count; this is more relevant when considering non-reuse and non-sharing incidents
xtabs(~SomeSelf + Journal#?+ResolvableScoreRevised_Max)#Self - code  --> question = do self reuses differ from external reuses? can they be included in the dataset?
xtabs(~Snap0Time1 + Journal#?+ResolvableScoreRevised_Max)#Sampling Method- binary   --> question =does random vs. continous sampling make a difference? Can these datasets be combined?
#add:CitationPracticesCombine and/or MultipleDatasets per Article (MultipleDatasetsInSameArticle ;   TotalDatasetsInSameArticle  )

##Equation Format ----------------------------------------------------------------------------------------------------------------------------------------
#Ordinal Regression
ddist<- datadist(Factor1,Factor2) #commas, not plus signs for multiple covariates
options(datadist='ddist')
ologit<- lrm(Score ~ Factor1+Factor2, data=Dataset, na.action=na.pass)
print(ologit)
anova(ologit)
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf <- function(y)
      c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)),
      'Y>=2'=qlogis(mean(y >= 2)))
s <- summary(Score ~ Factor1+Factor2, fun=sf)
s
text(Stop!)#modify to match output before running -->which (# of score states), xlim (change according to min and max in "s" output)
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1,1))

##Copy for each iteration  ----------------------------------------------------------------------------------------------------------------------------------------
###Score and Reuse/Share
attach(Dataset);str(Dataset)
xtabs(~ Factor1+Score); xtabs(~ Factor2+Score)
#Ordinal Regression
ddist<- datadist(Factor1,Factor2) #commas, not plus signs for multiple covariates
options(datadist='ddist')
ologit<- lrm(Score ~ Factor1+Factor2, data=Dataset, na.action=na.pass)
print(ologit)
anova(ologit)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)),   'Y>=2'=qlogis(mean(y >= 2)))
s <- summary(Score ~ Factor1+Factor2, fun=sf);s
text(Stop!)#modify to match output before running -->which (# of score states), xlim (change according to min and max in "s" output)
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1,1))
#Output
##Tables
###Observations:
##ANOVA
###Interpretation:
##Regression
###Interpretation:
##Diagnostics
###Interpretation:
###Plot File Name:   .jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1,1))

###1.exploratory -----------------------------------------------------------------------------------------------------------------------------------------
##1a = multivar with broad factors ----------------------------------------------------------------------------------------------------------------------------------------
### -------------------------------------------------------------------------------------------------------------------------------------------------
###Resolvability and Reuse
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+ResolvableScoreRevised_Max);xtabs(~ YearCode+ResolvableScoreRevised_Max);xtabs(~ DataDefinedDiscipline+ResolvableScoreRevised_Max);xtabs(~ DepositoryYNandNot+ResolvableScoreRevised_Max)
#Ordinal Regression
ddist1<- datadist(Journal,YearCode,DataDefinedDiscipline,DepositoryYNandNot) #commas, not plus signs for multiple covariates
options(datadist='ddist1')
ologit1<- lrm(ResolvableScoreRevised_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, data=AllReuse, na.action=na.pass)
print(ologit1)
anova(ologit1)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf1 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s1 <- summary( ResolvableScoreRevised_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, fun=sf1);s1
text(Stop!)#modify to match output before running -->which, xlim
plot(s1, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,4.1))
#Output
##Tables
###Observations:
#       ResolvableScoreRevised_Max
#Journal  0  1  2
#    AN   6 45  9
#    EC   4 11  2
#    GCB  9 19  1
#    ME   2  8 11
#    PB   5 16  4
#    SB   5 53 33
#            ResolvableScoreRevised_Max
#YearCode  0  1  2
#   Y2000 12 22 13
#   Y2005  2 12  6
#   Y2006  2  8  3
#   Y2007  1 14  4
#   Y2008  2 14  2
#   Y2009  2 13  7
#   Y2010 10 69 25
#                       ResolvableScoreRevised_Max
#DataDefinedDiscipline  0  1  2
#                  Eco 12 60  7
#                  Env 13 31  7
#                  Evo  6 61 46
#                    ResolvableScoreRevised_Max
#DepositoryYNandNot  0  1  2
#                 D  1 10 44
#                 N 17 84  6
#                 O 13 58 10
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised_Max
# Factor                Chi-Square d.f. P
# Journal                7.68       5   0.1750
# YearCode              10.20       6   0.1166
# DataDefinedDiscipline  0.29       2   0.8637
# DepositoryYNandNot    57.34       2   <.0001
# TOTAL                 83.63      15   <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      2e-09     124.59         15          0      0.832      0.663      0.676      0.354       0.48      0.099
#                          Coef    S.E.   Wald Z P
#y>=1                       5.8434 0.7908  7.39  0.0000
#y>=2                       1.5529 0.6966  2.23  0.0258
#Journal=EC                -1.1977 0.7336 -1.63  0.1026
#Journal=GCB               -1.7239 0.6480 -2.66  0.0078
#Journal=ME                -0.2741 0.7381 -0.37  0.7103
#Journal=PB                -1.0064 0.6823 -1.48  0.1402
#Journal=SB                -0.1611 0.4849 -0.33  0.7397
#YearCode=Y2005            -0.2755 0.6430 -0.43  0.6684
#YearCode=Y2006            -0.4713 0.7300 -0.65  0.5185
#YearCode=Y2007             0.4218 0.6837  0.62  0.5372
#YearCode=Y2008            -0.2639 0.6705 -0.39  0.6939
#YearCode=Y2009             0.6506 0.6700  0.97  0.3315
#YearCode=Y2010             1.0403 0.4336  2.40  0.0164
#DataDefinedDiscipline=Env -0.0337 0.4831 -0.07  0.9444
#DataDefinedDiscipline=Evo -0.2718 0.5043 -0.54  0.5899
#DepositoryYNandNot=N      -3.9932 0.5329 -7.49  0.0000
#DepositoryYNandNot=O      -3.7915 0.5783 -6.56  0.0000
##Diagnostics
###Interpretation
###Plot File Name: ResolveReusePO.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s1, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,4.1))
#ResolvableScoreRevised_Max    N=243
#+---------------------+-----+---+----+---------+-----------+
#|                     |     |N  |Y>=0|Y>=1     |Y>=2       |
#+---------------------+-----+---+----+---------+-----------+
#|Journal              |AN   | 60|Inf |2.1972246|-1.73460106|
#|                     |EC   | 17|Inf |1.1786550|-2.01490302|
#|                     |GCB  | 29|Inf |0.7985077|-3.33220451|
#|                     |ME   | 21|Inf |2.2512918| 0.09531018|
#|                     |PB   | 25|Inf |1.3862944|-1.65822808|
#|                     |SB   | 91|Inf |2.8449094|-0.56393545|
#+---------------------+-----+---+----+---------+-----------+
#|YearCode             |Y2000| 47|Inf |1.0704414|-0.96141117|
#|                     |Y2005| 20|Inf |2.1972246|-0.84729786|
#|                     |Y2006| 13|Inf |1.7047481|-1.20397280|
#|                     |Y2007| 19|Inf |2.8903718|-1.32175584|
#|                     |Y2008| 18|Inf |2.0794415|-2.07944154|
#|                     |Y2009| 22|Inf |2.3025851|-0.76214005|
#|                     |Y2010|104|Inf |2.2407097|-1.15057203|
#+---------------------+-----+---+----+---------+-----------+
#|DataDefinedDiscipline|Eco  | 79|Inf |1.7197860|-2.33075597|
#|                     |Env  | 51|Inf |1.0726368|-1.83827948|
#|                     |Evo  |113|Inf |2.8810694|-0.37605122|
#+---------------------+-----+---+----+---------+-----------+
#|DepositoryYNandNot   |D    | 55|Inf |3.9889840| 1.38629436|
#|                     |N    |107|Inf |1.6665963|-2.82336105|
#|                     |O    | 81|Inf |1.6545583|-1.96009478|
#+---------------------+-----+---+----+---------+-----------+
#|Overall              |     |243|Inf |1.9225991|-1.11514159|
#+---------------------+-----+---+----+---------+-----------+



###1.exploratory
##1a = multivar with broad factors
###Attribution and Reuse
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+AttributionScore_IncluSelf_Revised_Max);xtabs(~ YearCode+AttributionScore_IncluSelf_Revised_Max);xtabs(~ DataDefinedDiscipline+AttributionScore_IncluSelf_Revised_Max);xtabs(~ DepositoryYNandNot+AttributionScore_IncluSelf_Revised_Max)
#Ordinal Regression
ddist3<- datadist(Journal,YearCode,DataDefinedDiscipline,DepositoryYNandNot) #commas, not plus signs for multiple covariates
options(datadist='ddist3')
ologit3<- lrm(AttributionScore_IncluSelf_Revised_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, data=AllReuse, na.action=na.pass)
print(ologit3)
anova(ologit3)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf3 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s3 <- summary( AttributionScore_IncluSelf_Revised_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, fun=sf3);s3
text(Stop!)#modify to match output before running -->which, xlim
plot(s3, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.2,1.7))
#Output
##Tables
###Observations:
#       AttributionScore_IncluSelf_Revised_Max
#Journal  0  1  2
#    AN  17 43  0
#    EC  13  4  0
#    GCB 13 16  0
#    ME   4 11  6
#    PB  11 14  0
#    SB  19 61 11
#        AttributionScore_IncluSelf_Revised_Max
#YearCode  0  1  2
#   Y2000 12 31  4
#   Y2005  6 12  2
#   Y2006  4  8  1
#   Y2007  3 16  0
#   Y2008  3 15  0
#   Y2009  7 14  1
#   Y2010 42 53  9
#                     AttributionScore_IncluSelf_Revised_Max
#DataDefinedDiscipline  0  1  2
#                  Eco 24 55  0
#                  Env 31 20  0
#                  Evo 22 74 17
#                  AttributionScore_IncluSelf_Revised_Max
#DepositoryYNandNot  0  1  2
#                 D 10 28 17
#                 N 26 81  0
#                 O 41 40  0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: AttributionScore_IncluSelf_Revised_Max
#
# Factor                Chi-Square d.f. P
# Journal               11.28       5   0.0460
# YearCode               2.65       6   0.8513
# DataDefinedDiscipline  7.27       2   0.0264
# DepositoryYNandNot    17.68       2   0.0001
# TOTAL                 53.64      15   <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      2e-10      70.99         15          0      0.775      0.551      0.558      0.287       0.31      0.179
#
#                          Coef    S.E.   Wald Z P
#y>=1                       3.0813 0.6906  4.46  0.0000
#y>=2                      -1.1330 0.6351 -1.78  0.0744
#Journal=EC                -2.0642 0.7323 -2.82  0.0048
#Journal=GCB               -0.3070 0.5843 -0.53  0.5993
#Journal=ME                 0.7511 0.6377  1.18  0.2389
#Journal=PB                -0.5101 0.5864 -0.87  0.3844
#Journal=SB                 0.1980 0.4416  0.45  0.6538
#YearCode=Y2005            -0.6693 0.6267 -1.07  0.2856
#YearCode=Y2006            -0.7916 0.7153 -1.11  0.2684
#YearCode=Y2007            -0.4165 0.6206 -0.67  0.5021
#YearCode=Y2008            -0.3861 0.6528 -0.59  0.5543
#YearCode=Y2009            -0.7650 0.5974 -1.28  0.2003
#YearCode=Y2010            -0.1534 0.3954 -0.39  0.6981
#DataDefinedDiscipline=Env -1.1923 0.4443 -2.68  0.0073
#DataDefinedDiscipline=Evo -0.2252 0.4679 -0.48  0.6304
#DepositoryYNandNot=N      -1.2734 0.4850 -2.63  0.0086
#DepositoryYNandNot=O      -2.2037 0.5385 -4.09  0.0000
##Diagnostics
###Interpretation:
###Plot File Name:  AttribReuseBroad.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s3, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.2,1.7))
#AttributionScore_IncluSelf_Revised_Max    N=243
#
#+---------------------+-----+---+----+-----------+----------+
#|                     |     |N  |Y>=0|Y>=1       |Y>=2      |
#+---------------------+-----+---+----+-----------+----------+
#|Journal              |AN   | 60|Inf | 0.92798677|      -Inf|
#|                     |EC   | 17|Inf |-1.17865500|      -Inf|
#|                     |GCB  | 29|Inf | 0.20763936|      -Inf|
#|                     |ME   | 21|Inf | 1.44691898|-0.9162907|
#|                     |PB   | 25|Inf | 0.24116206|      -Inf|
#|                     |SB   | 91|Inf | 1.33222714|-1.9841314|
#+---------------------+-----+---+----+-----------+----------+
#|YearCode             |Y2000| 47|Inf | 1.07044141|-2.3749058|
#|                     |Y2005| 20|Inf | 0.84729786|-2.1972246|
#|                     |Y2006| 13|Inf | 0.81093022|-2.4849066|
#|                     |Y2007| 19|Inf | 1.67397643|      -Inf|
#|                     |Y2008| 18|Inf | 1.60943791|      -Inf|
#|                     |Y2009| 22|Inf | 0.76214005|-3.0445224|
#|                     |Y2010|104|Inf | 0.38946477|-2.3566523|
#+---------------------+-----+---+----+-----------+----------+
#|DataDefinedDiscipline|Eco  | 79|Inf | 0.82927935|      -Inf|
#|                     |Env  | 51|Inf |-0.43825493|      -Inf|
#|                     |Evo  |113|Inf | 1.41981705|-1.7311348|
#+---------------------+-----+---+----+-----------+----------+
#|DepositoryYNandNot   |D    | 55|Inf | 1.50407740|-0.8043728|
#|                     |N    |107|Inf | 1.13635262|      -Inf|
#|                     |O    | 81|Inf |-0.02469261|      -Inf|
#+---------------------+-----+---+----+-----------+----------+
#|Overall              |     |243|Inf | 0.76818237|-2.5873217|
#+---------------------+-----+---+----+-----------+----------+



###1.exploratory
##1a = multivar with broad factors
###"Ideal" and Reuse
###PlaceHolder: Ideal_CitationScoreSimpleREVISED_Max
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+Ideal_CitationScoreSimpleREVISED_Max);xtabs(~ YearCode+Ideal_CitationScoreSimpleREVISED_Max);xtabs(~ DataDefinedDiscipline+Ideal_CitationScoreSimpleREVISED_Max);xtabs(~ DepositoryYNandNot+Ideal_CitationScoreSimpleREVISED_Max)
#Ordinal Regression
ddist4<- datadist(Journal,YearCode,DataDefinedDiscipline,DepositoryYNandNot) #commas, not plus signs for multiple covariates
options(datadist='ddist4')
ologit4<- lrm(Ideal_CitationScoreSimpleREVISED_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, data=AllReuse, na.action=na.pass)
print(ologit4)
anova(ologit4)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf4 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s4 <- summary( Ideal_CitationScoreSimpleREVISED_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, fun=sf4);s4
text(Stop!)#modify to match output before running -->which, xlim
plot(s4, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,1.7))
#Output
##Tables
###Observations:
#       Ideal_CitationScoreSimpleREVISED_Max
#Journal  0  1  2
#    AN  51  9  0
#    EC  15  2  0
#    GCB 28  1  0
#    ME  10  5  6
#    PB  21  4  0
#    SB  57 24 10
#        Ideal_CitationScoreSimpleREVISED_Max
#YearCode  0  1  2
#   Y2000 34  9  4
#   Y2005 14  4  2
#   Y2006 10  2  1
#   Y2007 15  4  0
#   Y2008 16  2  0
#   Y2009 15  6  1
#   Y2010 78 18  8
#                     Ideal_CitationScoreSimpleREVISED_Max
#DataDefinedDiscipline  0  1  2
#                  Eco 72  7  0
#                  Env 44  7  0
#                  Evo 66 31 16
#                  Ideal_CitationScoreSimpleREVISED_Max
#DepositoryYNandNot   0   1   2
#                 D  10  29  16
#                 N 101   6   0
#                 O  71  10   0
#
##ANOVA
###Interpretation:
#                Wald Statistics          Response: Ideal_CitationScoreSimpleREVISED_Max
#
# Factor                Chi-Square d.f. P
# Journal                3.59       5   0.6096
# YearCode               6.45       6   0.3745
# DataDefinedDiscipline  1.56       2   0.4583
# DepositoryYNandNot    44.64       2   <.0001
# TOTAL                 81.40      15   <.0001
#
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      5e-12     138.43         15          0      0.906      0.813      0.827      0.327      0.573       0.09
#
#                          Coef     S.E.   Wald Z P
#y>=1                       2.39576 0.9883  2.42  0.0154
#y>=2                      -0.44499 0.9541 -0.47  0.6409
#Journal=EC                -0.27632 0.9563 -0.29  0.7726
#Journal=GCB               -1.53851 1.1794 -1.30  0.1921
#Journal=ME                 0.77450 0.7768  1.00  0.3187
#Journal=PB                -0.05386 0.8045 -0.07  0.9466
#Journal=SB                 0.15549 0.6509  0.24  0.8112
#YearCode=Y2005            -0.60639 0.8072 -0.75  0.4525
#YearCode=Y2006            -0.60745 1.0158 -0.60  0.5499
#YearCode=Y2007            -0.66562 0.9080 -0.73  0.4635
#YearCode=Y2008            -1.46526 1.0970 -1.34  0.1817
#YearCode=Y2009             0.44822 0.7506  0.60  0.5504
#YearCode=Y2010             0.60260 0.5575  1.08  0.2798
#DataDefinedDiscipline=Env  0.26653 0.6533  0.41  0.6833
#DataDefinedDiscipline=Evo -0.83733 0.8704 -0.96  0.3361
#DepositoryYNandNot=N      -5.06879 0.7738 -6.55  0.0000
#DepositoryYNandNot=O      -4.49110 0.7963 -5.64  0.0000
##Diagnostics
###Interpretation:
###Plot File Name:  IdealReuseBroad.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s4, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,1.7))
# Ideal_CitationScoreSimpleREVISED_Max    N=243
#
#+---------------------+-----+---+----+-----------+----------+
#|                     |     |N  |Y>=0|Y>=1       |Y>=2      |
#+---------------------+-----+---+----+-----------+----------+
#|Journal              |AN   | 60|Inf |-1.73460106|      -Inf|
#|                     |EC   | 17|Inf |-2.01490302|      -Inf|
#|                     |GCB  | 29|Inf |-3.33220451|      -Inf|
#|                     |ME   | 21|Inf | 0.09531018|-0.9162907|
#|                     |PB   | 25|Inf |-1.65822808|      -Inf|
#|                     |SB   | 91|Inf |-0.51669074|-2.0918641|
#+---------------------+-----+---+----+-----------+----------+
#|YearCode             |Y2000| 47|Inf |-0.96141117|-2.3749058|
#|                     |Y2005| 20|Inf |-0.84729786|-2.1972246|
#|                     |Y2006| 13|Inf |-1.20397280|-2.4849066|
#|                     |Y2007| 19|Inf |-1.32175584|      -Inf|
#|                     |Y2008| 18|Inf |-2.07944154|      -Inf|
#|                     |Y2009| 22|Inf |-0.76214005|-3.0445224|
#|                     |Y2010|104|Inf |-1.09861229|-2.4849066|
#+---------------------+-----+---+----+-----------+----------+
#|DataDefinedDiscipline|Eco  | 79|Inf |-2.33075597|      -Inf|
#|                     |Env  | 51|Inf |-1.83827948|      -Inf|
#|                     |Evo  |113|Inf |-0.33950714|-1.8021223|
#+---------------------+-----+---+----+-----------+----------+
#|DepositoryYNandNot   |D    | 55|Inf | 1.50407740|-0.8909729|
#|                     |N    |107|Inf |-2.82336105|      -Inf|
#|                     |O    | 81|Inf |-1.96009478|      -Inf|
#+---------------------+-----+---+----+-----------+----------+
#|Overall              |     |243|Inf |-1.09313282|-2.6523613|
#+---------------------+-----+---+----+-----------+----------+


###1.exploratory
##1a = multivar with broad factors
###Resolvability and Sharing
attach(AllShare);str(AllShare)
xtabs(~ Journal+ResolvableScoreRevised);xtabs(~ YearCode+ResolvableScoreRevised);xtabs(~ DataDefinedDiscipline+ResolvableScoreRevised);xtabs(~ DepositoryYNandNot+ResolvableScoreRevised)
#Ordinal Regression
ddist2<- datadist(Journal,YearCode,DataDefinedDiscipline,DepositoryYNandNot) #commas, not plus signs for multiple covariates
options(datadist='ddist2')
ologit2<- lrm(ResolvableScoreRevised ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, data=AllShare, na.action=na.pass)
print(ologit2)
anova(ologit2)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf2 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)),'Y>=2'=qlogis(mean(y >= 2)))
s2 <- summary( ResolvableScoreRevised~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, fun=sf2);s2
text(Stop!)#modify to match output before running -->which, xlim
plot(s2, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.2,4.7))
#Output
##Tables
###Observations:
#       ResolvableScoreRevised
#Journal  0  1  2
#    AN   6 19  5
#    EC   0  4  1
#    GCB  0  4  2
#    ME  10  9 24
#    PB   6 17  2
#    SB   3 34 72
#        ResolvableScoreRevised
#YearCode  0  1  2
#   Y2000  8 21 25
#   Y2005  0  9 11
#   Y2006  1  4  9
#   Y2007  2  7 10
#   Y2008  1  3  5
#   Y2009  2 12  8
#   Y2010 11 31 38
#                    ResolvableScoreRevised
#DataDefinedDiscipline   0   1   2
#                  Eco  11  33   2
#                  Env   9  17   3
#                  Evo   5  37 101
#                  ResolvableScoreRevised
#DepositoryYNandNot  0  1  2
#                 D  1  5 88
#                 N  0  2  2
#                 O 24 80 16
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised
#
# Factor                Chi-Square d.f. P
# Journal               13.03       5   0.0231
# YearCode               2.59       6   0.8578
# DataDefinedDiscipline  8.40       2   0.0150
# DepositoryYNandNot    54.91       2   <.0001
# TOTAL                 87.06      15   <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       218      4e-12     185.59         15          0      0.904      0.808      0.834       0.48       0.67       0.08
#
#                          Coef     S.E.   Wald Z P
#y>=1                       5.03106 0.8375  6.01  0.0000
#y>=2                       1.15907 0.8260  1.40  0.1605
#Journal=EC                 0.10340 1.2055  0.09  0.9316
#Journal=GCB                2.34997 1.0702  2.20  0.0281
#Journal=ME                -0.70562 0.7151 -0.99  0.3238
#Journal=PB                 0.53164 0.6848  0.78  0.4375
#Journal=SB                 0.73652 0.6173  1.19  0.2328
#YearCode=Y2005             0.06311 0.7660  0.08  0.9343
#YearCode=Y2006            -0.64537 0.8839 -0.73  0.4653
#YearCode=Y2007            -0.50653 0.7556 -0.67  0.5026
#YearCode=Y2008             0.79158 1.0673  0.74  0.4583
#YearCode=Y2009             0.26769 0.6332  0.42  0.6725
#YearCode=Y2010            -0.25738 0.4478 -0.57  0.5655
#DataDefinedDiscipline=Env  0.07955 0.5731  0.14  0.8896
#DataDefinedDiscipline=Evo  1.56198 0.5995  2.61  0.0092
#DepositoryYNandNot=N      -2.84603 1.1600 -2.45  0.0141
#DepositoryYNandNot=O      -4.29109 0.5792 -7.41  0.0000
##Diagnostics
###Interpretation:
###Plot File Name:   .jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s2, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.2,4.7))
 #ResolvableScoreRevised    N=218
#
#+---------------------+-----+---+----+---------+-----------+
#|                     |     |N  |Y>=0|Y>=1     |Y>=2       |
#+---------------------+-----+---+----+---------+-----------+
#|Journal              |AN   | 30|Inf |1.3862944|-1.60943791|
#|                     |EC   |  5|Inf |      Inf|-1.38629436|
#|                     |GCB  |  6|Inf |      Inf|-0.69314718|
#|                     |ME   | 43|Inf |1.1939225| 0.23361485|
#|                     |PB   | 25|Inf |1.1526795|-2.44234704|
#|                     |SB   |109|Inf |3.5648268| 0.66574821|
#+---------------------+-----+---+----+---------+-----------+
#|YearCode             |Y2000| 54|Inf |1.7491999|-0.14842001|
#|                     |Y2005| 20|Inf |      Inf| 0.20067070|
#|                     |Y2006| 14|Inf |2.5649494| 0.58778666|
#|                     |Y2007| 19|Inf |2.1400662| 0.10536052|
#|                     |Y2008|  9|Inf |2.0794415| 0.22314355|
#|                     |Y2009| 22|Inf |2.3025851|-0.55961579|
#|                     |Y2010| 80|Inf |1.8362112|-0.10008346|
#+---------------------+-----+---+----+---------+-----------+
#|DataDefinedDiscipline|Eco  | 46|Inf |1.1574528|-3.09104245|
#|                     |Env  | 29|Inf |0.7985077|-2.15948425|
#|                     |Evo  |143|Inf |3.3178158| 0.87745090|
#+---------------------+-----+---+----+---------+-----------+
#|DepositoryYNandNot   |D    | 94|Inf |4.5325995| 2.68557735|
#|                     |N    |  4|Inf |      Inf| 0.00000000|
#|                     |O    |120|Inf |1.3862944|-1.87180218|
#+---------------------+-----+---+----+---------+-----------+
#|Overall              |     |218|Inf |2.0438144|-0.05505978|
#+---------------------+-----+---+----+---------+-----------+


###1.exploratory
##1a = multivar with broad factors
###Right Place and Sharing
attach(AllShare);str(AllShare)
xtabs(~ Journal+RightPlaceYN);xtabs(~ YearCode+RightPlaceYN);xtabs(~ DataDefinedDiscipline+RightPlaceYN);xtabs(~ DepositoryYNandNot+RightPlaceYN)
#Ordinal Regression
ddist5<- datadist(Journal,YearCode,DataDefinedDiscipline,DepositoryYNandNot) #commas, not plus signs for multiple covariates
options(datadist='ddist5')
ologit5<- lrm(RightPlaceYN ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, data=AllShare, na.action=na.pass)
print(ologit5)
anova(ologit5)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf5 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)))
s5 <- summary( RightPlaceYN~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, fun=sf5);s5
#Output
##Tables
###Observations:
#       RightPlaceYN
#Journal  0  1
#    AN  26  4
#    EC   3  2
#    GCB  6  0
#    ME  20 23
#    PB  25  0
#    SB  45 64
#        RightPlaceYN
#YearCode  0  1
#   Y2000 36 18
#   Y2005  9 11
#   Y2006  5  9
#   Y2007  9 10
#   Y2008  5  4
#   Y2009 16  6
#   Y2010 45 35
#                     RightPlaceYN
#DataDefinedDiscipline  0  1
#                  Eco 45  1
#                  Env 29  0
#                  Evo 51 92
#                  RightPlaceYN
#DepositoryYNandNot   0   1
#                 D   1  93
#                 N   4   0
#                 O 120   0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: RightPlaceYN
#
# Factor                Chi-Square d.f. P
# Journal               0.07        5   0.9999
# YearCode              0.03        6   1.0000
# DataDefinedDiscipline 0.00        2   0.9991
# DepositoryYNandNot    0.14        2   0.9305
# TOTAL                 0.21       15   1.0000
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       218      8e-04     294.72         15          0          1          1          1      0.491      0.996      0.002
#
#                          Coef     S.E.   Wald Z P
#Intercept                   6.9392 139.79  0.05  0.9604
#Journal=EC                 11.5611 165.38  0.07  0.9443
#Journal=GCB                13.0956 198.26  0.07  0.9473
#Journal=ME                 12.3347 103.46  0.12  0.9051
#Journal=PB                 11.0508 148.16  0.07  0.9405
#Journal=SB                 10.4975  41.00  0.26  0.7979
#YearCode=Y2005              0.3143 154.32  0.00  0.9984
#YearCode=Y2006              6.2772 109.65  0.06  0.9543
#YearCode=Y2007              3.0449 391.76  0.01  0.9938
#YearCode=Y2008              7.4098 127.59  0.06  0.9537
#YearCode=Y2009              2.0024 134.83  0.01  0.9882
#YearCode=Y2010             -1.8571 101.16 -0.02  0.9854
#DataDefinedDiscipline=Env  -3.5606 120.76 -0.03  0.9765
#DataDefinedDiscipline=Evo  -5.0820 123.90 -0.04  0.9673
#DepositoryYNandNot=N      -27.6655 258.34 -0.11  0.9147
#DepositoryYNandNot=O      -28.9925  79.91 -0.36  0.7168
#
##Diagnostics
###Interpretation:
###Plot File Name: no needed for only 2 levels
#RightPlaceYN    N=218
#
#+---------------------+-----+---+----+----------+
#|                     |     |N  |Y>=0|Y>=1      |
#+---------------------+-----+---+----+----------+
#|Journal              |AN   | 30|Inf |-1.8718022|
#|                     |EC   |  5|Inf |-0.4054651|
#|                     |GCB  |  6|Inf |      -Inf|
#|                     |ME   | 43|Inf | 0.1397619|
#|                     |PB   | 25|Inf |      -Inf|
#|                     |SB   |109|Inf | 0.3522206|
#+---------------------+-----+---+----+----------+
#|YearCode             |Y2000| 54|Inf |-0.6931472|
#|                     |Y2005| 20|Inf | 0.2006707|
#|                     |Y2006| 14|Inf | 0.5877867|
#|                     |Y2007| 19|Inf | 0.1053605|
#|                     |Y2008|  9|Inf |-0.2231436|
#|                     |Y2009| 22|Inf |-0.9808293|
#|                     |Y2010| 80|Inf |-0.2513144|
#+---------------------+-----+---+----+----------+
#|DataDefinedDiscipline|Eco  | 46|Inf |-3.8066625|
#|                     |Env  | 29|Inf |      -Inf|
#|                     |Evo  |143|Inf | 0.5899629|
#+---------------------+-----+---+----+----------+
#|DepositoryYNandNot   |D    | 94|Inf | 4.5325995|
#|                     |N    |  4|Inf |      -Inf|
#|                     |O    |120|Inf |      -Inf|
#+---------------------+-----+---+----+----------+
#|Overall              |     |218|Inf |-0.2957142|
#+---------------------+-----+---+----+----------+


###1.exploratory
##1a = multivar with broad factors
###Resolvability and Sharing
###Placeholder:PercentSharedNoSim OR AdditionalUnsharedDataYN


###1.exploratory----------------------------------------------------------------------------------------------------------------------------------------
##1b = univariate of each individually with specific factors-------------------------------------------------------------------------------------------
###----------------------------------------------------------------------------------------------------------------------------------------------------
###6:Resolve and Journal
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+ResolvableScoreRevised_Max)#Journal - specific --> question = policy (does a journals unique data policy influence reuse/sharing?)
#Ordinal Regression
ddist6<- datadist(Journal) #commas, not plus signs for multiple covariates
options(datadist='ddist6')
ologit6<- lrm(ResolvableScoreRevised_Max ~ Journal, data=AllReuse, na.action=na.pass)
print(ologit6)
anova(ologit6)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf6 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s6 <- summary( ResolvableScoreRevised_Max ~ Journal, fun=sf6);s6
text(Stop!)#modify to match output before running -->which, xlim
plot(s6, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,3.0))
#Output
##Tables
###Observations:
#       ResolvableScoreRevised_Max
#Journal  0  1  2
#    AN   6 45  9
#    EC   4 11  2
#    GCB  9 19  1
#    ME   2  8 11
#    PB   5 16  4
#    SB   5 53 33
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised_Max
# Factor     Chi-Square d.f. P
# Journal    31.94      5    <.0001
# TOTAL      31.94      5    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
 #      243      3e-07      35.42          5          0      0.692      0.384      0.493      0.205      0.162      0.104
#              Coef    S.E.   Wald Z P
#y>=1         1.9175 0.3119  6.15  0.0000
#y>=2        -1.5397 0.2936 -5.24  0.0000
#Journal=EC  -0.6536 0.5877 -1.11  0.2661
#Journal=GCB -1.2042 0.4723 -2.55  0.0108
#Journal=ME   1.5012 0.5298  2.83  0.0046
#Journal=PB  -0.3503 0.5145 -0.68  0.4960
#Journal=SB   0.9695 0.3518  2.76  0.0059
##Diagnostics
###Interpretation:
###Plot File Name:   ResolveJournalSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s6, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1,1))
#ResolvableScoreRevised_Max    N=243
#+-------+---+---+----+---------+-----------+
#|       |   |N  |Y>=0|Y>=1     |Y>=2       |
#+-------+---+---+----+---------+-----------+
#|Journal|AN | 60|Inf |2.1972246|-1.73460106|
#|       |EC | 17|Inf |1.1786550|-2.01490302|
#|       |GCB| 29|Inf |0.7985077|-3.33220451|
#|       |ME | 21|Inf |2.2512918| 0.09531018|
#|       |PB | 25|Inf |1.3862944|-1.65822808|
#|       |SB | 91|Inf |2.8449094|-0.56393545|
#+-------+---+---+----+---------+-----------+
#|Overall|   |243|Inf |1.9225991|-1.11514159|
#+-------+---+---+----+---------+-----------+


##1b = univariate of each individually with specific factors
###7:Resolve and Year (code, not sequential--->TRY sequential with NumCode or straight years)
attach(AllReuse);str(AllReuse)
xtabs(~ YearCode+ResolvableScoreRevised_Max)#Year - specific    --> question = time###1.exploratory#Ordinal Regression
ddist7<- datadist(YearCode) #commas, not plus signs for multiple covariates
options(datadist='ddist7')
ologit7<- lrm(ResolvableScoreRevised_Max ~ YearCode, data=AllReuse, na.action=na.pass)
print(ologit7)
anova(ologit7)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf7 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s7 <- summary( ResolvableScoreRevised_Max ~ YearCode, fun=sf7);s7
text(Stop!)#modify to match output before running -->which, xlim
plot(s7, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.2,3.0))
#Output
##Tables
###Observations:
#        ResolvableScoreRevised_Max
#YearCode  0  1  2
#   Y2000 12 22 13
#   Y2005  2 12  6
#   Y2006  2  8  3
#   Y2007  1 14  4
#   Y2008  2 14  2
#   Y2009  2 13  7
#   Y2010 10 69 25
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised_Max
# Factor     Chi-Square d.f. P
# YearCode   3.14       6    0.7908
# TOTAL      3.14       6    0.7908
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      2e-07       3.17          6     0.7866      0.557      0.113      0.149       0.06      0.016      0.109
 #               Coef     S.E.   Wald Z P
#y>=1            1.61763 0.3362  4.81  0.0000
#y>=2           -1.45409 0.3314 -4.39  0.0000
#YearCode=Y2005  0.60002 0.5489  1.09  0.2743
#YearCode=Y2006  0.18557 0.6498  0.29  0.7752
#YearCode=Y2007  0.42167 0.5477  0.77  0.4414
#YearCode=Y2008 -0.08177 0.5601 -0.15  0.8839
#YearCode=Y2009  0.69039 0.5308  1.30  0.1933
#YearCode=Y2010  0.39917 0.3723  1.07  0.2837
##Diagnostics
###Interpretation:
###Plot File Name:   ResolveYearSpecificCode.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s7, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.2,3.0))
#ResolvableScoreRevised_Max    N=243
#+--------+-----+---+----+--------+----------+
#|        |     |N  |Y>=0|Y>=1    |Y>=2      |
#+--------+-----+---+----+--------+----------+
#|YearCode|Y2000| 47|Inf |1.070441|-0.9614112|
#|        |Y2005| 20|Inf |2.197225|-0.8472979|
#|        |Y2006| 13|Inf |1.704748|-1.2039728|
#|        |Y2007| 19|Inf |2.890372|-1.3217558|
#|        |Y2008| 18|Inf |2.079442|-2.0794415|
#|        |Y2009| 22|Inf |2.302585|-0.7621401|
#|        |Y2010|104|Inf |2.240710|-1.1505720|
#+--------+-----+---+----+--------+----------+
#|Overall |     |243|Inf |1.922599|-1.1151416|
#+--------+-----+---+----+--------+----------+


###1.exploratory
##1b = univariate of each individually with specific factors
###8:Resolve and Depository
attach(AllReuse);str(AllReuse)
xtabs(~ DepositoryAbbrvOtherSpecified+ResolvableScoreRevised_Max) #Depository - specific  --> question = which depository is "best"?
#Ordinal Regression
ddist8<- datadist(DepositoryAbbrvOtherSpecified) #commas, not plus signs for multiple covariates
options(datadist='ddist8')
ologit8<- lrm(ResolvableScoreRevised_Max ~ DepositoryAbbrvOtherSpecified, data=AllReuse, na.action=na.pass)
print(ologit8)
anova(ologit8)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf8 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s8 <- summary( ResolvableScoreRevised_Max ~ DepositoryAbbrvOtherSpecified, fun=sf8);s8
text(Stop!)#modify to match output before running -->which, xlim
plot(s8, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.8,2.1))
#Output
##Tables
###Observations:
#                             ResolvableScoreRevised_Max
#DepositoryAbbrvOtherSpecified  0  1  2
#                           DB  1  6  1
#                           E   5 20  3
#                           G   0  9 40
#                           O  25 92  8
#                           T   0  1  4
#                           U   0 24  4
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised_Max
#
# Factor                        Chi-Square d.f. P
# DepositoryAbbrvOtherSpecified 81.7       5    <.0001
# TOTAL                         81.7       5    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      6e-09     121.54          5          0      0.791      0.582      0.779      0.311      0.471      0.103
                                Coef    S.E.   Wald Z P
#y>=1                             2.1075 0.8758  2.41  0.0161
#y>=2                            -2.1075 0.8758 -2.41  0.0161
#DepositoryAbbrvOtherSpecified=E -0.3784 0.9742 -0.39  0.6977
#DepositoryAbbrvOtherSpecified=G  3.6032 0.9500  3.79  0.0001
#DepositoryAbbrvOtherSpecified=O -0.6868 0.8885 -0.77  0.4395
#DepositoryAbbrvOtherSpecified=T  3.4984 1.4193  2.46  0.0137
#DepositoryAbbrvOtherSpecified=U  0.6755 0.9610  0.70  0.4821
##Diagnostics
###Interpretation:
###Plot File Name:  ResolveDepositorySpecified.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s8, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.8,2.1))
#ResolvableScoreRevised_Max    N=243
#+-----------------------------+--+---+----+--------+---------+
#|                             |  |N  |Y>=0|Y>=1    |Y>=2     |
#+-----------------------------+--+---+----+--------+---------+
#|DepositoryAbbrvOtherSpecified|DB|  8|Inf |1.945910|-1.945910|
#|                             |E | 28|Inf |1.526056|-2.120264|
#|                             |G | 49|Inf |     Inf| 1.491655|
#|                             |O |125|Inf |1.386294|-2.682732|
#|                             |T |  5|Inf |     Inf| 1.386294|
#|                             |U | 28|Inf |     Inf|-1.791759|
#+-----------------------------+--+---+----+--------+---------+
#|Overall                      |  |243|Inf |1.922599|-1.115142|
#+-----------------------------+--+---+----+--------+---------+


###1.exploratory
##1b = univariate of each individually with specific factors
###9:Resolve and Dataset Type (original, as extracted)
attach(AllReuse);str(AllReuse)
xtabs(~ TypeOfDataset+ResolvableScoreRevised_Max) #Data Type -specific  --> question =  are certain data sets better cited? GS suspected
#Ordinal Regression
ddist9<- datadist(TypeOfDataset) #commas, not plus signs for multiple covariates
options(datadist='ddist9')
ologit9<- lrm(ResolvableScoreRevised_Max ~ TypeOfDataset, data=AllReuse, na.action=na.pass)
print(ologit9)
anova(ologit9)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf9 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s9 <- summary( ResolvableScoreRevised_Max ~ TypeOfDataset, fun=sf9);s9
text(Stop!)#modify to match output before running -->which, xlim
plot(s9, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.9,3.2))
#Output
##Tables
###Observations:
#             ResolvableScoreRevised_Max
#TypeOfDataset  0  1  2
#          Bio  7 37  3
#          EA   9 21  2
#          Eco  2 11  2
#          GA   0  4  2
#          GIS  3  9  2
#          GO   1  4  0
#          GS   4 40 41
#          PA   3 12  2
#          PT   1 13  3
#          XY   1  1  3
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised_Max
# Factor        Chi-Square d.f. P
# TypeOfDataset 44.02      9    <.0001
# TOTAL         44.02      9    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      2e-12      52.39          9          0      0.727      0.455      0.555      0.243      0.232      0.106
#                   Coef     S.E.   Wald Z P
#y>=1               1.49082 0.3285  4.54  0.0000
#y>=2              -2.17324 0.3625 -6.00  0.0000
#TypeOfDataset=EA  -0.54934 0.4851 -1.13  0.2575
#TypeOfDataset=Eco  0.34121 0.6499  0.53  0.5996
#TypeOfDataset=GA   1.60711 0.8750  1.84  0.0663
#TypeOfDataset=GIS  0.02910 0.6742  0.04  0.9656
#TypeOfDataset=GO  -0.42170 0.9621 -0.44  0.6612
#TypeOfDataset=GS   2.06086 0.4154  4.96  0.0000
#TypeOfDataset=PA   0.09282 0.6190  0.15  0.8808
#TypeOfDataset=PT   0.81409 0.6106  1.33  0.1824
#TypeOfDataset=XY   2.22095 1.0406  2.13  0.0328
##Diagnostics
###Interpretation:
###Plot File Name:   ResolveDatatypeSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s9, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1,1))
#ResolvableScoreRevised_Max    N=243
#+-------------+---+---+----+---------+-----------+
#|             |   |N  |Y>=0|Y>=1     |Y>=2       |
#+-------------+---+---+----+---------+-----------+
#|TypeOfDataset|Bio| 47|Inf |1.7429693|-2.68557735|
#|             |EA | 32|Inf |0.9382696|-2.70805020|
#|             |Eco| 15|Inf |1.8718022|-1.87180218|
#|             |GA |  6|Inf |      Inf|-0.69314718|
#|             |GIS| 14|Inf |1.2992830|-1.79175947|
#|             |GO |  5|Inf |1.3862944|       -Inf|
#|             |GS | 85|Inf |3.0081548|-0.07061757|
#|             |PA | 17|Inf |1.5404450|-2.01490302|
#|             |PT | 17|Inf |2.7725887|-1.54044504|
#|             |XY |  5|Inf |1.3862944| 0.40546511|
#+-------------+---+---+----+---------+-----------+
#|Overall      |   |243|Inf |1.9225991|-1.11514159|
#+-------------+---+---+----+---------+-----------+


###1.exploratory
##1b = univariate of each individually with specific factors
###10:Resolve and Open Access
attach(AllReuse);str(AllReuse)
xtabs(~ Open.Access +ResolvableScoreRevised_Max)#OA - specific (sub vs. OA)   --> question =  does reuse/sharing quality increase with ARTICLE level open access?
#Ordinal Regression
ddist10<- datadist(Open.Access) #commas, not plus signs for multiple covariates
options(datadist='ddist10')
ologit10<- lrm(ResolvableScoreRevised_Max ~ Open.Access, data=AllReuse, na.action=na.pass)
print(ologit10)
anova(ologit10)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf10 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s10 <- summary( ResolvableScoreRevised_Max ~ Open.Access, fun=sf10);s10
text(Stop!)#modify to match output before running -->which, xlim
plot(s10, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1.5,2.8))
#Output
##Tables
###Observations:
#           ResolvableScoreRevised_Max
#Open.Access   0   1   2
#          0  30 141  57
#          1   1  11   3
##ANOVA
###Interpretation:
#                Wald Statistics          Response: ResolvableScoreRevised_Max
# Factor      Chi-Square d.f. P
# Open.Access 0          1    0.9692
# TOTAL       0          1    0.9692
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      7e-10          0          1     0.9692      0.501      0.001      0.011      0.001          0      0.111
#             Coef     S.E.   Wald Z P
#y>=1         1.92128 0.1953  9.84  0.0000
#y>=2        -1.11647 0.1527 -7.31  0.0000
#Open.Access  0.02012 0.5210  0.04  0.9692
##Diagnostics
###Interpretation:
###Plot File Name:   ReuseResolveOA.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s10, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1.5,2.8))
#+-----------+---+---+----+--------+---------+
#|           |   |N  |Y>=0|Y>=1    |Y>=2     |
#+-----------+---+---+----+--------+---------+
#|Open.Access|No |228|Inf |1.887070|-1.098612|
#|           |Yes| 15|Inf |2.639057|-1.386294|
#+-----------+---+---+----+--------+---------+
#|Overall    |   |243|Inf |1.922599|-1.115142|
#+-----------+---+---+----+--------+---------+


xtabs(~ Journal#?+ResolvableScoreRevised_Max)#Citations -    --> question =  does reuse/sharing increase # citing articles? _!need to normalize this to per year average!
###1.exploratory
##1b = univariate of each individually with specific factors
###11:Resolve and Num. citing Articles

#attribution----------------------------------------------------------------------------------------------------------------------------------------------
###1.exploratory
##1b = univariate of each individually with specific factors
###16:Attribution and Journal
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+AttributionScore_IncluSelf_Revised_Max)#Journal - specific --> question = policy (does a journals unique data policy influence reuse/sharing?)
#Ordinal Regression
ddist16<- datadist(Journal) #commas, not plus signs for multiple covariates
options(datadist='ddist16')
ologit16<- lrm(AttributionScore_IncluSelf_Revised_Max ~ Journal, data=AllReuse, na.action=na.pass)
print(ologit16)
anova(ologit16)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf16 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s16 <- summary( AttributionScore_IncluSelf_Revised_Max ~ Journal, fun=sf16);s16
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#       AttributionScore_IncluSelf_Revised_Max
#Journal  0  1  2
#    AN  17 43  0
#    EC  13  4  0
#    GCB 13 16  0
#    ME   4 11  6
#    PB  11 14  0
#    SB  19 61 11
##ANOVA
###Interpretation:
#                Wald Statistics          Response: AttributionScore_IncluSelf_Revised_Max
# Factor     Chi-Square d.f. P
# Journal    31.32      5    <.0001
# TOTAL      31.32      5    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      4e-08      36.54          5          0      0.694      0.388      0.495      0.202      0.171      0.194
#            Coef    S.E.   Wald Z P
#y>=1         0.7591 0.2608  2.91  0.0036
#y>=2        -2.9541 0.3640 -8.12  0.0000
#Journal=EC  -1.9474 0.6271 -3.11  0.0019
#Journal=GCB -0.6126 0.4448 -1.38  0.1685
#Journal=ME   1.5510 0.5900  2.63  0.0086
#Journal=PB  -0.5820 0.4677 -1.24  0.2133
#Journal=SB   0.7135 0.3529  2.02  0.0432
##Diagnostics
###Interpretation:
###Plot File Name:   AttribReuseJournal.jpg (folder: StatsOutput-->Proportional Odds Plots)
#plot(s16, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.7,1.6))
#AttributionScore_IncluSelf_Revised_Max    N=243
#+-------+---+---+----+----------+----------+
#|       |   |N  |Y>=0|Y>=1      |Y>=2      |
#+-------+---+---+----+----------+----------+
#|Journal|AN | 60|Inf | 0.9279868|      -Inf|
#|       |EC | 17|Inf |-1.1786550|      -Inf|
#|       |GCB| 29|Inf | 0.2076394|      -Inf|
#|       |ME | 21|Inf | 1.4469190|-0.9162907|
#|       |PB | 25|Inf | 0.2411621|      -Inf|
#|       |SB | 91|Inf | 1.3322271|-1.9841314|
#+-------+---+---+----+----------+----------+
#|Overall|   |243|Inf | 0.7681824|-2.5873217|
#+-------+---+---+----+----------+----------+

##1b = univariate of each individually with specific factors
###17:Attrib and Year (code, not sequential--->TRY sequential with NumCode or straight years)
attach(AllReuse);str(AllReuse)
xtabs(~ YearCode+AttributionScore_IncluSelf_Revised_Max)#Year - specific    --> question = time###1.exploratory#Ordinal Regression
ddist17<- datadist(YearCode) #commas, not plus signs for multiple covariates
options(datadist='ddist17')
ologit17<- lrm(AttributionScore_IncluSelf_Revised_Max ~ YearCode, data=AllReuse, na.action=na.pass)
print(ologit17)
anova(ologit17)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf17 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s17 <- summary( AttributionScore_IncluSelf_Revised_Max~ YearCode, fun=sf17);s17
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#        AttributionScore_IncluSelf_Revised_Max
#YearCode  0  1  2
#   Y2000 12 31  4
#   Y2005  6 12  2
#   Y2006  4  8  1
#   Y2007  3 16  0
#   Y2008  3 15  0
#   Y2009  7 14  1
#   Y2010 42 53  9
##ANOVA
###Interpretation:
#                Wald Statistics          Response: AttributionScore_IncluSelf_Revised_Max
# Factor     Chi-Square d.f. P
# YearCode   4.38       6    0.6258
# TOTAL      4.38       6    0.6258
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      4e-07       4.44          6     0.6174      0.571      0.141      0.191      0.074      0.022       0.21
#               Coef     S.E.   Wald Z P
#y>=1            1.05950 0.3088  3.43  0.0006
#y>=2           -2.34244 0.3613 -6.48  0.0000
#YearCode=Y2005 -0.12891 0.5528 -0.23  0.8156
#YearCode=Y2006 -0.22789 0.6407 -0.36  0.7221
#YearCode=Y2007  0.10007 0.5453  0.18  0.8544
#YearCode=Y2008  0.06893 0.5554  0.12  0.9012
#YearCode=Y2009 -0.35496 0.5216 -0.68  0.4961
#YearCode=Y2010 -0.57372 0.3625 -1.58  0.1135
##Diagnostics
###Interpretation:
###Plot File Name:   AttribReuseYearCode.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s17, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.2,1.8))
#AttributionScore_IncluSelf_Revised_Max    N=243
#+--------+-----+---+----+---------+---------+
#|        |     |N  |Y>=0|Y>=1     |Y>=2     |
#+--------+-----+---+----+---------+---------+
#|YearCode|Y2000| 47|Inf |1.0704414|-2.374906|
#|        |Y2005| 20|Inf |0.8472979|-2.197225|
#|        |Y2006| 13|Inf |0.8109302|-2.484907|
#|        |Y2007| 19|Inf |1.6739764|     -Inf|
#|        |Y2008| 18|Inf |1.6094379|     -Inf|
#|        |Y2009| 22|Inf |0.7621401|-3.044522|
#|        |Y2010|104|Inf |0.3894648|-2.356652|
#+--------+-----+---+----+---------+---------+
#|Overall |     |243|Inf |0.7681824|-2.587322|
#+--------+-----+---+----+---------+---------+


###1.exploratory
##1b = univariate of each individually with specific factors
###18:Attrib and Depository
attach(AllReuse);str(AllReuse)
xtabs(~ DepositoryAbbrvOtherSpecified+AttributionScore_IncluSelf_Revised_Max) #Depository - specific  --> question = which depository is "best"?
#Ordinal Regression
ddist18<- datadist(DepositoryAbbrvOtherSpecified) #commas, not plus signs for multiple covariates
options(datadist='ddist18')
ologit18<- lrm(AttributionScore_IncluSelf_Revised_Max~ DepositoryAbbrvOtherSpecified, data=AllReuse, na.action=na.pass)
print(ologit18)
anova(ologit18)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf18 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s18 <- summary(AttributionScore_IncluSelf_Revised_Max~ DepositoryAbbrvOtherSpecified, fun=sf18);s18
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#                             AttributionScore_IncluSelf_Revised_Max
#DepositoryAbbrvOtherSpecified  0  1  2
#                           DB  7  1  0
#                           E   6 22  0
#                           G   8 25 16
#                           O  32 93  0
#                           T   1  3  1
#                           U  23  5  0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: AttributionScore_IncluSelf_Revised_Max
# Factor                        Chi-Square d.f. P
# DepositoryAbbrvOtherSpecified 52.89      5    <.0001
# TOTAL                         52.89      5    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      2e-10      74.08          5          0      0.743      0.486      0.656      0.253      0.321      0.172
#                                Coef    S.E.  Wald Z P
#y>=1                            -1.9485 1.069 -1.82  0.0683
#y>=2                            -6.0997 1.127 -5.41  0.0000
#DepositoryAbbrvOtherSpecified=E  3.0483 1.143  2.67  0.0077
#DepositoryAbbrvOtherSpecified=G  4.9165 1.159  4.24  0.0000
#DepositoryAbbrvOtherSpecified=O  2.8714 1.086  2.65  0.0082
#DepositoryAbbrvOtherSpecified=T  4.0241 1.561  2.58  0.0099
#DepositoryAbbrvOtherSpecified=U  0.4183 1.177  0.36  0.7223
##Diagnostics
###Interpretation:
###Plot File Name:   AttribReuseDeposSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s18, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.7,1.8))
#AttributionScore_IncluSelf_Revised_Max    N=243
#+-----------------------------+--+---+----+----------+----------+
#|                             |  |N  |Y>=0|Y>=1      |Y>=2      |
#+-----------------------------+--+---+----+----------+----------+
#|DepositoryAbbrvOtherSpecified|DB|  8|Inf |-1.9459101|      -Inf|
#|                             |E | 28|Inf | 1.2992830|      -Inf|
#|                             |G | 49|Inf | 1.6341305|-0.7239188|
#|                             |O |125|Inf | 1.0668636|      -Inf|
#|                             |T |  5|Inf | 1.3862944|-1.3862944|
#|                             |U | 28|Inf |-1.5260563|      -Inf|
#+-----------------------------+--+---+----+----------+----------+
#|Overall                      |  |243|Inf | 0.7681824|-2.5873217|
#+-----------------------------+--+---+----+----------+----------+


###1.exploratory
##1b = univariate of each individually with specific factors
###19:Attrib and Dataset Type (original, as extracted)
attach(AllReuse);str(AllReuse)
xtabs(~ TypeOfDataset+AttributionScore_IncluSelf_Revised_Max) #Data Type -specific  --> question =  are certain data sets better cited? GS suspected
#Ordinal Regression
ddist19<- datadist(TypeOfDataset) #commas, not plus signs for multiple covariates
options(datadist='ddist19')
ologit19<- lrm(AttributionScore_IncluSelf_Revised_Max ~ TypeOfDataset, data=AllReuse, na.action=na.pass)
print(ologit19)
anova(ologit19)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf19 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s19 <- summary( AttributionScore_IncluSelf_Revised_Max~ TypeOfDataset, fun=sf19);s19
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#             AttributionScore_IncluSelf_Revised_Max
#TypeOfDataset  0  1  2
#          Bio 11 36  0
#          EA  18 14  0
#          Eco  6  9  0
#          GA   2  4  0
#          GIS 10  4  0
#          GO   2  3  0
#          GS  16 53 16
#          PA   7 10  0
#          PT   2 14  1
#          XY   3  2  0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: AttributionScore_IncluSelf_Revised_Max
# Factor        Chi-Square d.f. P
# TypeOfDataset 37.74      9    <.0001
# TOTAL         37.74      9    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      2e-07      43.61          9          0       0.72       0.44      0.541      0.229      0.201      0.188
#                  Coef    S.E.   Wald Z P
#y>=1               0.9631 0.3021  3.19  0.0014
#y>=2              -2.8416 0.3913 -7.26  0.0000
#TypeOfDataset=EA  -1.2439 0.4630 -2.69  0.0072
#TypeOfDataset=Eco -0.6323 0.5876 -1.08  0.2819
#TypeOfDataset=GA  -0.3827 0.8624 -0.44  0.6572
#TypeOfDataset=GIS -1.8916 0.6622 -2.86  0.0043
#TypeOfDataset=GO  -0.6323 0.9255 -0.68  0.4945
#TypeOfDataset=GS   0.9392 0.4039  2.33  0.0201
#TypeOfDataset=PA  -0.6760 0.5610 -1.20  0.2282
#TypeOfDataset=PT   0.6923 0.6121  1.13  0.2580
#TypeOfDataset=XY  -1.3924 0.9533 -1.46  0.1441
##Diagnostics
###Interpretation:
###Plot File Name:   AttribReuseDatatypeSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s19, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.9,2.2))
#AttributionScore_IncluSelf_Revised_Max    N=243
#+-------------+---+---+----+----------+---------+
#|             |   |N  |Y>=0|Y>=1      |Y>=2     |
#+-------------+---+---+----+----------+---------+
#|TypeOfDataset|Bio| 47|Inf | 1.1856237|     -Inf|
#|             |EA | 32|Inf |-0.2513144|     -Inf|
#|             |Eco| 15|Inf | 0.4054651|     -Inf|
#|             |GA |  6|Inf | 0.6931472|     -Inf|
#|             |GIS| 14|Inf |-0.9162907|     -Inf|
#|             |GO |  5|Inf | 0.4054651|     -Inf|
#|             |GS | 85|Inf | 1.4615178|-1.461518|
#|             |PA | 17|Inf | 0.3566749|     -Inf|
#|             |PT | 17|Inf | 2.0149030|-2.772589|
#|             |XY |  5|Inf |-0.4054651|     -Inf|
#+-------------+---+---+----+----------+---------+
#|Overall      |   |243|Inf | 0.7681824|-2.587322|
#+-------------+---+---+----+----------+---------+


###1.exploratory
##1b = univariate of each individually with specific factors
###12:Attriband Open Access
attach(AllReuse);str(AllReuse)
xtabs(~ Open.Access +AttributionScore_IncluSelf_Revised_Max)#OA - specific (sub vs. OA)   --> question =  does reuse/sharing quality increase with ARTICLE level open access?
#Ordinal Regression
ddist12<- datadist(Open.Access) #commas, not plus signs for multiple covariates
options(datadist='ddist12')
ologit12<- lrm(AttributionScore_IncluSelf_Revised_Max ~ Open.Access, data=AllReuse, na.action=na.pass)
print(ologit12)
anova(ologit12)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf12 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s12 <- summary( AttributionScore_IncluSelf_Revised_Max ~ Open.Access, fun=sf12);s12
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#           AttributionScore_IncluSelf_Revised_Max
#Open.Access   0   1   2
#          0  71 140  17
#          1   6   9   0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: AttributionScore_IncluSelf_Revised_Max
# Factor      Chi-Square d.f. P
# Open.Access 0.97       1    0.3242
# TOTAL       0.97       1    0.3242
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      6e-10       0.96          1     0.3269      0.515       0.03      0.263      0.016      0.005      0.216
#            Coef    S.E.   Wald Z P
#y>=1         0.8034 0.1431   5.61 0.0000
#y>=2        -2.5605 0.2528 -10.13 0.0000
#Open.Access -0.5079 0.5152  -0.99 0.3242
##Diagnostics
###Interpretation:
###Plot File Name:   AttribReuseOA.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s12, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.6,1))
#+-----------+---+---+----+---------+---------+
#|           |   |N  |Y>=0|Y>=1     |Y>=2     |
#+-----------+---+---+----+---------+---------+
#|Open.Access|No |228|Inf |0.7935659|-2.518645|
#|           |Yes| 15|Inf |0.4054651|     -Inf|
#+-----------+---+---+----+---------+---------+
#|Overall    |   |243|Inf |0.7681824|-2.587322|
#+-----------+---+---+----+---------+---------+



#ideal--------------------------------------------------------------------------------------------------------------------------------------------------------
###1.exploratory
##1b = univariate of each individually with specific factors
###26:Ideal and Journal
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+Ideal_CitationScoreSimpleREVISED_Max)#Journal - specific --> question = policy (does a journals unique data policy influence reuse/sharing?)
#Ordinal Regression
ddist26<- datadist(Journal) #commas, not plus signs for multiple covariates
options(datadist='ddist26')
ologit26<- lrm(Ideal_CitationScoreSimpleREVISED_Max ~ Journal, data=AllReuse, na.action=na.pass)
print(ologit26)
anova(ologit26)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf26 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s26 <- summary( Ideal_CitationScoreSimpleREVISED_Max ~ Journal, fun=sf26);s26
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#       Ideal_CitationScoreSimpleREVISED_Max
#Journal  0  1  2
#    AN  51  9  0
#    EC  15  2  0
#    GCB 28  1  0
#    ME  10  5  6
#    PB  21  4  0
#    SB  57 24 10
##ANOVA
###Interpretation:
#                Wald Statistics          Response: Ideal_CitationScoreSimpleREVISED_Max
# Factor     Chi-Square d.f. P
# Journal    27.44      5    <.0001
# TOTAL      27.44      5    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      1e-06      35.01          5          0      0.717      0.435      0.577      0.175      0.177      0.166
#            Coef     S.E.   Wald Z P
#y>=1        -1.77023 0.3600 -4.92  0.0000
#y>=2        -3.47938 0.4319 -8.06  0.0000
#Journal=EC  -0.27097 0.8325 -0.33  0.7448
#Journal=GCB -1.56861 1.0793 -1.45  0.1461
#Journal=ME   2.11148 0.5650  3.74  0.0002
#Journal=PB   0.07327 0.6511  0.11  0.9104
#Journal=SB   1.27274 0.4189  3.04  0.0024
##Diagnostics
###Interpretation:
###Plot File Name:   IdealJournalSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s26, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,1))
#Ideal_CitationScoreSimpleREVISED_Max    N=243
#+-------+---+---+----+-----------+----------+
#|       |   |N  |Y>=0|Y>=1       |Y>=2      |
#+-------+---+---+----+-----------+----------+
#|Journal|AN | 60|Inf |-1.73460106|      -Inf|
#|       |EC | 17|Inf |-2.01490302|      -Inf|
#|       |GCB| 29|Inf |-3.33220451|      -Inf|
#|       |ME | 21|Inf | 0.09531018|-0.9162907|
#|       |PB | 25|Inf |-1.65822808|      -Inf|
#|       |SB | 91|Inf |-0.51669074|-2.0918641|
#+-------+---+---+----+-----------+----------+
#|Overall|   |243|Inf |-1.09313282|-2.6523613|
#+-------+---+---+----+-----------+----------+


##1b = univariate of each individually with specific factors
###27:Ideal and Year (code, not sequential--->TRY sequential with NumCode or straight years)
attach(AllReuse);str(AllReuse)
xtabs(~ YearCode+Ideal_CitationScoreSimpleREVISED_Max)#Year - specific    --> question = time###1.exploratory#Ordinal Regression
ddist27<- datadist(YearCode) #commas, not plus signs for multiple covariates
options(datadist='ddist27')
ologit27<- lrm(Ideal_CitationScoreSimpleREVISED_Max ~ YearCode, data=AllReuse, na.action=na.pass)
print(ologit27)
anova(ologit27)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf27 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s27 <- summary(Ideal_CitationScoreSimpleREVISED_Max ~ YearCode, fun=sf27);s27
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#        Ideal_CitationScoreSimpleREVISED_Max
#YearCode  0  1  2
#   Y2000 34  9  4
#   Y2005 14  4  2
#   Y2006 10  2  1
#   Y2007 15  4  0
#   Y2008 16  2  0
#   Y2009 15  6  1
#   Y2010 78 18  8
##ANOVA
###Interpretation:
#                Wald Statistics          Response: Ideal_CitationScoreSimpleREVISED_Max
# Factor     Chi-Square d.f. P
# YearCode   2.99       6    0.8096
# TOTAL      2.99       6    0.8096
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      3e-08       3.56          6     0.7363      0.561      0.121      0.166      0.049      0.019      0.186
#               Coef    S.E.   Wald Z P
#y>=1           -0.9457 0.3241 -2.92  0.0035
#y>=2           -2.5169 0.3861 -6.52  0.0000
#YearCode=Y2005  0.1241 0.5816  0.21  0.8310
#YearCode=Y2006 -0.2346 0.7310 -0.32  0.7483
#YearCode=Y2007 -0.4398 0.6437 -0.68  0.4944
#YearCode=Y2008 -1.1617 0.8150 -1.43  0.1540
#YearCode=Y2009  0.1241 0.5522  0.22  0.8222
#YearCode=Y2010 -0.1368 0.3940 -0.35  0.7284
##Diagnostics
###Interpretation:
###Plot File Name:   IdealReuseYearCode.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s27, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.2,-0))
#Ideal_CitationScoreSimpleREVISED_Max    N=243
#+--------+-----+---+----+----------+---------+
#|        |     |N  |Y>=0|Y>=1      |Y>=2     |
#+--------+-----+---+----+----------+---------+
#|YearCode|Y2000| 47|Inf |-0.9614112|-2.374906|
#|        |Y2005| 20|Inf |-0.8472979|-2.197225|
#|        |Y2006| 13|Inf |-1.2039728|-2.484907|
#|        |Y2007| 19|Inf |-1.3217558|     -Inf|
#|        |Y2008| 18|Inf |-2.0794415|     -Inf|
#|        |Y2009| 22|Inf |-0.7621401|-3.044522|
#|        |Y2010|104|Inf |-1.0986123|-2.484907|
#+--------+-----+---+----+----------+---------+
#|Overall |     |243|Inf |-1.0931328|-2.652361|
#+--------+-----+---+----+----------+---------+


###1.exploratory
##1b = univariate of each individually with specific factors
###28:Ideal and Depository
attach(AllReuse);str(AllReuse)
xtabs(~ DepositoryAbbrvOtherSpecified+Ideal_CitationScoreSimpleREVISED_Max) #Depository - specific  --> question = which depository is "best"?
#Ordinal Regression
ddist28<- datadist(DepositoryAbbrvOtherSpecified) #commas, not plus signs for multiple covariates
options(datadist='ddist28')
ologit28<- lrm(Ideal_CitationScoreSimpleREVISED_Max~ DepositoryAbbrvOtherSpecified, data=AllReuse, na.action=na.pass)
print(ologit28)
anova(ologit28)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf28 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s28 <- summary( Ideal_CitationScoreSimpleREVISED_Max ~ DepositoryAbbrvOtherSpecified, fun=sf28);s28
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#                             Ideal_CitationScoreSimpleREVISED_Max
#DepositoryAbbrvOtherSpecified   0   1   2
#                           DB   7   1   0
#                           E   25   3   0
#                           G    8  26  15
#                           O  117   8   0
#                           T    1   3   1
#                           U   24   4   0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: Ideal_CitationScoreSimpleREVISED_Max
# Factor                        Chi-Square d.f. P
# DepositoryAbbrvOtherSpecified 84.11      5    <.0001
# TOTAL                         84.11      5    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      8e-08     126.77          5          0      0.857      0.715      0.846      0.287      0.537       0.09
#                                Coef    S.E.  Wald Z P
#y>=1                            -1.9567 1.068 -1.83  0.0669
#y>=2                            -4.6584 1.134 -4.11  0.0000
#DepositoryAbbrvOtherSpecified=E -0.1725 1.230 -0.14  0.8885
#DepositoryAbbrvOtherSpecified=G  3.7556 1.133  3.31  0.0009
#DepositoryAbbrvOtherSpecified=O -0.7309 1.128 -0.65  0.5172
#DepositoryAbbrvOtherSpecified=T  3.3075 1.393  2.37  0.0176
#DepositoryAbbrvOtherSpecified=U  0.1521 1.196  0.13  0.8988
##Diagnostics
###Interpretation:
###Plot File Name:   IdealDepoSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s28, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.8,1.8))
#Ideal_CitationScoreSimpleREVISED_Max    N=243
#+-----------------------------+--+---+----+---------+----------+
#|                             |  |N  |Y>=0|Y>=1     |Y>=2      |
#+-----------------------------+--+---+----+---------+----------+
#|DepositoryAbbrvOtherSpecified|DB|  8|Inf |-1.945910|      -Inf|
#|                             |E | 28|Inf |-2.120264|      -Inf|
#|                             |G | 49|Inf | 1.634131|-0.8183103|
#|                             |O |125|Inf |-2.682732|      -Inf|
#|                             |T |  5|Inf | 1.386294|-1.3862944|
#|                             |U | 28|Inf |-1.791759|      -Inf|
#+-----------------------------+--+---+----+---------+----------+
#|Overall                      |  |243|Inf |-1.093133|-2.6523613|
#+-----------------------------+--+---+----+---------+----------+


###1.exploratory
##1b = univariate of each individually with specific factors
###29:Ideal and Dataset Type (original, as extracted)
attach(AllReuse);str(AllReuse)
xtabs(~ TypeOfDataset+Ideal_CitationScoreSimpleREVISED_Max) #Data Type -specific  --> question =  are certain data sets better cited? GS suspected
#Ordinal Regression
ddist29<- datadist(TypeOfDataset) #commas, not plus signs for multiple covariates
options(datadist='ddist29')
ologit29<- lrm(Ideal_CitationScoreSimpleREVISED_Max ~ TypeOfDataset, data=AllReuse, na.action=na.pass)
print(ologit29)
anova(ologit29)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf29 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s29 <- summary( Ideal_CitationScoreSimpleREVISED_Max ~ TypeOfDataset, fun=sf29);s29
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#             Ideal_CitationScoreSimpleREVISED_Max
#TypeOfDataset  0  1  2
#          Bio 44  3  0
#          EA  30  2  0
#          Eco 13  2  0
#          GA   4  2  0
#          GIS 12  2  0
#          GO   5  0  0
#          GS  43 27 15
#          PA  15  2  0
#          PT  14  2  1
#          XY   2  3  0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: Ideal_CitationScoreSimpleREVISED_Max
# Factor        Chi-Square d.f. P
# TypeOfDataset 41.16      9    <.0001
# TOTAL         41.16      9    <.0001
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      0.003      55.63          9          0       0.77      0.539      0.707      0.217       0.27      0.149
#                  Coef    S.E.    Wald Z P
#y>=1              -2.6974  0.5963 -4.52  0.0000
#y>=2              -4.4969  0.6472 -6.95  0.0000
#TypeOfDataset=EA  -0.0222  0.9425 -0.02  0.9812
#TypeOfDataset=Eco  0.7975  0.9637  0.83  0.4079
#TypeOfDataset=GA   1.8985  1.0314  1.84  0.0657
#TypeOfDataset=GIS  0.8749  0.9667  0.91  0.3654
#TypeOfDataset=GO  -4.8647 19.6347 -0.25  0.8043
#TypeOfDataset=GS   2.7432  0.6345  4.32  0.0000
#TypeOfDataset=PA   0.6584  0.9589  0.69  0.4923
#TypeOfDataset=PT   1.1871  0.8718  1.36  0.1733
#TypeOfDataset=XY   2.7432  0.9866  2.78  0.0054
##Diagnostics
###Interpretation:
###Plot File Name:   IdealDatatypeSpecific.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s29, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.9,.6))
#Ideal_CitationScoreSimpleREVISED_Max    N=243
#+-------------+---+---+----+-----------+---------+
#|             |   |N  |Y>=0|Y>=1       |Y>=2     |
#+-------------+---+---+----+-----------+---------+
#|TypeOfDataset|Bio| 47|Inf |-2.68557735|     -Inf|
#|             |EA | 32|Inf |-2.70805020|     -Inf|
#|             |Eco| 15|Inf |-1.87180218|     -Inf|
#|             |GA |  6|Inf |-0.69314718|     -Inf|
#|             |GIS| 14|Inf |-1.79175947|     -Inf|
#|             |GO |  5|Inf |       -Inf|     -Inf|
#|             |GS | 85|Inf |-0.02353050|-1.540445|
#|             |PA | 17|Inf |-2.01490302|     -Inf|
#|             |PT | 17|Inf |-1.54044504|-2.772589|
#|             |XY |  5|Inf | 0.40546511|     -Inf|
#+-------------+---+---+----+-----------+---------+
#|Overall      |   |243|Inf |-1.09313282|-2.652361|
#+-------------+---+---+----+-----------+---------+


###1.exploratory
##1b = univariate of each individually with specific factors
###20:Ideal and Open Access
attach(AllReuse);str(AllReuse)
xtabs(~ Open.Access +Ideal_CitationScoreSimpleREVISED_Max)#OA - specific (sub vs. OA)   --> question =  does reuse/sharing quality increase with ARTICLE level open access?
#Ordinal Regression
ddist20<- datadist(Open.Access) #commas, not plus signs for multiple covariates
options(datadist='ddist20')
ologit20<- lrm(Ideal_CitationScoreSimpleREVISED_Max ~ Open.Access, data=AllReuse, na.action=na.pass)
print(ologit20)
anova(ologit20)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf20 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s20 <- summary(Ideal_CitationScoreSimpleREVISED_Max~ Open.Access, fun=sf20);s20
text(Stop!)#modify to match output before running -->which, xlim
#Output
##Tables
###Observations:
#           Ideal_CitationScoreSimpleREVISED_Max
#Open.Access   0   1   2
#          0 170  42  16
#          1  12   3   0
##ANOVA
###Interpretation:
#                Wald Statistics          Response: Ideal_CitationScoreSimpleREVISED_Max
# Factor      Chi-Square d.f. P
# Open.Access 0.33       1    0.5677
# TOTAL       0.33       1    0.5677
##Regression
###Interpretation:
#       Obs  Max Deriv Model L.R.       d.f.          P          C        Dxy      Gamma      Tau-a         R2      Brier
#       243      9e-08       0.35          1     0.5542       0.51       0.02      0.187      0.008      0.002      0.188
#            Coef    S.E.   Wald Z P
#y>=1        -1.0710 0.1521  -7.04 0.0000
#y>=2        -2.6315 0.2607 -10.09 0.0000
#Open.Access -0.3753 0.6567  -0.57 0.5677
##Diagnostics
###Interpretation:
###Plot File Name:   IdealReuseOA.jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s20, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-2.7,-0.9))
#Ideal_CitationScoreSimpleREVISED_Max    N=243
#+-----------+---+---+----+---------+---------+
#|           |   |N  |Y>=0|Y>=1     |Y>=2     |
#+-----------+---+---+----+---------+---------+
#|Open.Access|No |228|Inf |-1.075355|-2.583998|
#|           |Yes| 15|Inf |-1.386294|     -Inf|
#+-----------+---+---+----+---------+---------+
#|Overall    |   |243|Inf |-1.093133|-2.652361|
#+-----------+---+---+----+---------+---------+



#RESUME TXFR HERE**********************************************************************************************************************************
###sharing---------------------------------------------------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------------------------------------------------------
xtabs(~ Journal+ResolvableScoreRevised_Max)#Journal - specific --> question = policy (does a journals unique data policy influence reuse/sharing?)
#copy all below for each factor
###1.exploratory
##1b = univariate of each individually with specific factors
###6:Resolve and Journal
attach(AllReuse);str(AllReuse)
xtabs(~ Journal+ResolvableScoreRevised_Max)#Journal - specific --> question = policy (does a journals unique data policy influence reuse/sharing?)
#Ordinal Regression
ddist6<- datadist(Journal) #commas, not plus signs for multiple covariates
options(datadist='ddist6')
ologit6<- lrm(ResolvableScoreRevised_Max ~ Journal, data=AllReuse, na.action=na.pass)
print(ologit6)
anova(ologit6)
#Diagnostics
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf6 <- function(y) c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))
s6 <- summary( ResolvableScoreRevised_Max ~ Journal, fun=sf6);s6
text(Stop!)#modify to match output before running -->which, xlim
plot(s6, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,3.0))
#Output
##Tables
###Observations:
##ANOVA
###Interpretation:
##Regression
###Interpretation:
##Diagnostics
###Interpretation:
###Plot File Name:   .jpg (folder: StatsOutput-->Proportional Odds Plots)
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-1,1))

xtabs(~ YearCode+ResolvableScoreRevised_Max)#Year - specific    --> question = time
xtabs(~ DepositoryAbbrvOtherSpecified+ResolvableScoreRevised_Max) #Depository - specific  --> question = which depository is "best"?
xtabs(~ TypeOfDataset+ResolvableScoreRevised_Max) #Data Type -specific  --> question =  are certain data sets better cited? GS suspected
xtabs(~ Open.Access +ResolvableScoreRevised_Max)#OA - specific (sub vs. OA)   --> question =  does reuse/sharing increase with ARTICLE level open access?
xtabs(~ Journal#?+ResolvableScoreRevised_Max)#Citations - continuous (sub vs. OA)   --> question =  does reuse/sharing increase # citing articles? _!need to normalize this to per year average!

###1.exploratory
##1b = univariate of each individually with possibly interesting broad factors
xtabs(~ ImpactFactor +ResolvableScoreRevised_Max)#Journal - continuous (impact factor)  --> question =   does higher impact factor influence reuse/sharing quality?
xtabs(~ Discipline_Gut +ResolvableScoreRevised_Max)#Journal - broad (discilpine_gut) --> question = discipline (scientist attitude within)
xtabs(~ DataGenre +ResolvableScoreRevised_Max)   #Data Type - broad (genre) --> question = does the inherent type of data (unit of analysis) influence citation quality? _less confounding with depository, fewer degrees of freedom
xtabs(~ DataDefinedDiscipline  +ResolvableScoreRevised_Max)   #Data Type - broad (data-defined discipline) --> question =   discipline (scientist attitude within)_difficult to define each paper by its discipline, so why not use the datatype?!

###-------------------------------------------------------------------------------------------------------------------------------------------------------
###---------------------------------------------------------------------------------------------------------------------------------------------------
### 2. multivar with relevant factors  (specific states) ----------------------------------------------------------------------------------------------
#potential interactions
#1 journal vs. data type vs. depository - which is more important?
#above with year and other relevant (journal/article factors)
#sig from #1 with year
ddist<- datadist(Journal,YearCode,DataDefinedDiscipline,DepositoryYNandNot) #commas, not plus signs for multiple covariates
options(datadist='ddist')
ologit<- lrm(ResolvableScoreRevised_Max ~ Journal+YearCode+DataDefinedDiscipline+DepositoryYNandNot, data=TimeReuse, na.action=na.pass)
print(ologit)
anova(ologit)
text(Stop!)#modify to match output before running--> Number of Y repeats
 sf <- function(y)
      c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)),
      'Y>=2'=qlogis(mean(y >= 2)))
s <- summary( ResolvableScoreRevised  ~ Journal+YearCode+DepositoryAbbrv+BroaderDatatypes, fun=sf)
s
text(Stop!)#modify to match output before running -->which, xlim
plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=c(-3.5,2.9))


#3. interaction between reuse and sharing (also make sure this covers (or cover independently)...trends in articles that do vs. those that don't share)--->see #7 above
#4. univariate of dominant suspected trends
#5. sampling artifacts